MNGames commited on
Commit
af290e7
1 Parent(s): 988049d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import torch
3
- import cv2
4
  import pytesseract
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
 
@@ -13,12 +12,10 @@ classification_model_name = "distilbert-base-uncased"
13
  classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
14
  classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
15
 
16
- # Define function for OCR
17
  def perform_ocr(image):
18
- # Convert image to grayscale
19
- gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
20
- # Perform OCR
21
- text = pytesseract.image_to_string(gray_image)
22
  return text
23
 
24
  # Define function to process video and predict
@@ -36,15 +33,12 @@ def predict_runner_status(video_file):
36
  results_detection = model(frame)
37
 
38
  # Logic for determining runner status using detected objects
39
- # Example: if person detected, extract text and classify
40
  objects = results_detection.pred[0][:, -1].numpy()
41
  if 0 in objects: # 0 corresponds to person class
42
- # Get the cropped region containing the person for OCR
43
  person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
44
  person_bbox = person_bbox.astype(int)
45
  person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
46
-
47
- # Perform OCR on the cropped image
48
  text = perform_ocr(person_img)
49
 
50
  # Classification using text classification model
 
1
  import gradio as gr
2
  import torch
 
3
  import pytesseract
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
 
 
12
  classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
13
  classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
14
 
15
+ # Define function for text recognition (OCR)
16
  def perform_ocr(image):
17
+ # Perform OCR using pytesseract
18
+ text = pytesseract.image_to_string(image)
 
 
19
  return text
20
 
21
  # Define function to process video and predict
 
33
  results_detection = model(frame)
34
 
35
  # Logic for determining runner status using detected objects
 
36
  objects = results_detection.pred[0][:, -1].numpy()
37
  if 0 in objects: # 0 corresponds to person class
38
+ # Perform OCR on the detected person
39
  person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
40
  person_bbox = person_bbox.astype(int)
41
  person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
 
 
42
  text = perform_ocr(person_img)
43
 
44
  # Classification using text classification model