mkoot007 commited on
Commit
d45b0ff
·
1 Parent(s): 196be8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -20
app.py CHANGED
@@ -13,24 +13,14 @@ explainer = AutoModelForSequenceClassification.from_pretrained("gpt2")
13
  def extract_text(image):
14
  return ocr_reader.readtext(image)
15
 
 
 
 
 
 
16
  # Define a function to explain the extracted text
17
  def explain_text(text):
18
- tokenizer = AutoTokenizer.from_pretrained("gpt2")
19
-
20
- # Set pad_token to eos_token (end of sequence token)
21
- tokenizer.pad_token = tokenizer.eos_token
22
-
23
- # Convert the text to a string if necessary
24
- if not isinstance(text, str):
25
- text = str(text)
26
-
27
- # Encode the text and convert to PyTorch tensors
28
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
29
-
30
- input_ids = inputs["input_ids"]
31
- attention_mask = inputs["attention_mask"]
32
-
33
- explanation = explainer(input_ids, attention_mask=attention_mask)
34
  return explanation
35
 
36
  # Create a Streamlit layout
@@ -44,11 +34,9 @@ if uploaded_file is not None:
44
  # Read the uploaded image
45
  image = Image.open(uploaded_file)
46
 
47
- # Process the image and convert to NumPy array if necessary
48
- # image = process_image(image)
49
-
50
  # Extract text from the image
51
- extracted_text = extract_text(image)
 
52
 
53
  # Explain the extracted text
54
  explanation = explain_text(extracted_text)
 
13
  def extract_text(image):
14
  return ocr_reader.readtext(image)
15
 
16
+ # Define a function to process OCR results and extract actual text
17
+ def process_ocr_results(ocr_results):
18
+ extracted_text = " ".join([res[1] for res in ocr_results])
19
+ return extracted_text
20
+
21
  # Define a function to explain the extracted text
22
  def explain_text(text):
23
+ explanation = "The extracted text is: " + text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return explanation
25
 
26
  # Create a Streamlit layout
 
34
  # Read the uploaded image
35
  image = Image.open(uploaded_file)
36
 
 
 
 
37
  # Extract text from the image
38
+ ocr_results = extract_text(image)
39
+ extracted_text = process_ocr_results(ocr_results)
40
 
41
  # Explain the extracted text
42
  explanation = explain_text(extracted_text)