Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -178,18 +178,20 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
178 |
def ocr(image):
|
179 |
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
180 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
|
181 |
-
if
|
182 |
-
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
# Save the image to a
|
185 |
-
|
186 |
-
image.save(
|
|
|
187 |
|
188 |
-
# Perform OCR on the image using the
|
189 |
-
res = model.chat(tokenizer,
|
190 |
-
|
191 |
-
# Clean up the temporary file
|
192 |
-
os.remove(temp_image_path)
|
193 |
|
194 |
# Return the extracted text
|
195 |
return res
|
@@ -217,7 +219,7 @@ iface_out = gr.Interface(
|
|
217 |
iface_ocr = gr.Interface(
|
218 |
fn=ocr,
|
219 |
inputs=gr.Image(type="numpy", label="Upload Image"),
|
220 |
-
outputs="
|
221 |
api_name="ocr", # This explicitly sets the api_name
|
222 |
title="OCR Image Text Extraction",
|
223 |
description="Upload an image and extract text using the OCR model."
|
|
|
178 |
def ocr(image):
|
179 |
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
180 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
|
181 |
+
# Check if the input is a numpy array and convert to PIL Image
|
182 |
+
if isinstance(image, np.ndarray):
|
183 |
+
image = Image.fromarray(image)
|
184 |
+
# Ensure the input is a PIL Image
|
185 |
+
elif not isinstance(image, Image.Image):
|
186 |
+
raise ValueError("Input must be a numpy.ndarray or a PIL.Image.")
|
187 |
|
188 |
+
# Save the image to a BytesIO stream
|
189 |
+
image_bytes = io.BytesIO()
|
190 |
+
image.save(image_bytes, format='JPEG')
|
191 |
+
image_bytes.seek(0) # Move the cursor to the start of the stream
|
192 |
|
193 |
+
# Perform OCR on the image using the BytesIO stream
|
194 |
+
res = model.chat(tokenizer, image_bytes, ocr_type='ocr') # Check if the model supports BytesIO input
|
|
|
|
|
|
|
195 |
|
196 |
# Return the extracted text
|
197 |
return res
|
|
|
219 |
iface_ocr = gr.Interface(
|
220 |
fn=ocr,
|
221 |
inputs=gr.Image(type="numpy", label="Upload Image"),
|
222 |
+
outputs=gr.Textbox(label="Extracted Text"),
|
223 |
api_name="ocr", # This explicitly sets the api_name
|
224 |
title="OCR Image Text Extraction",
|
225 |
description="Upload an image and extract text using the OCR model."
|