coolfrxcrazy commited on
Commit
e1ff842
·
verified ·
1 Parent(s): 231c02f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -178,18 +178,20 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
178
  def ocr(image):
179
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
180
  model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
181
- if isinstance(image, str):
182
- image = Image.open(image)
 
 
 
 
183
 
184
- # Save the image to a temporary file
185
- temp_image_path = "temp_image.jpg"
186
- image.save(temp_image_path, format='JPEG')
 
187
 
188
- # Perform OCR on the image using the file path
189
- res = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
190
-
191
- # Clean up the temporary file
192
- os.remove(temp_image_path)
193
 
194
  # Return the extracted text
195
  return res
@@ -217,7 +219,7 @@ iface_out = gr.Interface(
217
  iface_ocr = gr.Interface(
218
  fn=ocr,
219
  inputs=gr.Image(type="numpy", label="Upload Image"),
220
- outputs="text",
221
  api_name="ocr", # This explicitly sets the api_name
222
  title="OCR Image Text Extraction",
223
  description="Upload an image and extract text using the OCR model."
 
178
  def ocr(image):
179
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
180
  model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
181
+ # Check if the input is a numpy array and convert to PIL Image
182
+ if isinstance(image, np.ndarray):
183
+ image = Image.fromarray(image)
184
+ # Ensure the input is a PIL Image
185
+ elif not isinstance(image, Image.Image):
186
+ raise ValueError("Input must be a numpy.ndarray or a PIL.Image.")
187
 
188
+ # Save the image to a BytesIO stream
189
+ image_bytes = io.BytesIO()
190
+ image.save(image_bytes, format='JPEG')
191
+ image_bytes.seek(0) # Move the cursor to the start of the stream
192
 
193
+ # Perform OCR on the image using the BytesIO stream
194
+ res = model.chat(tokenizer, image_bytes, ocr_type='ocr') # Check if the model supports BytesIO input
 
 
 
195
 
196
  # Return the extracted text
197
  return res
 
219
  iface_ocr = gr.Interface(
220
  fn=ocr,
221
  inputs=gr.Image(type="numpy", label="Upload Image"),
222
+ outputs=gr.Textbox(label="Extracted Text"),
223
  api_name="ocr", # This explicitly sets the api_name
224
  title="OCR Image Text Extraction",
225
  description="Upload an image and extract text using the OCR model."