|
import streamlit as st |
|
from PIL import Image |
|
import numpy as np |
|
import easyocr |
|
import pandas as pd |
|
import base64 |
|
import re |
|
from datetime import datetime, timedelta |
|
|
|
def process_image(image): |
|
reader = easyocr.Reader(['en'], gpu=False) |
|
img_np = np.array(image) |
|
result = reader.readtext(img_np) |
|
|
|
extracted_data = { |
|
"Name": None, |
|
"Father Name": None, |
|
"Gender": None, |
|
"Country of Stay": "Pakistan", |
|
"Identity Number": None, |
|
"Date of Birth": None, |
|
"Date of Issue": None, |
|
"Date of Expiry": None |
|
} |
|
|
|
for i, detection in enumerate(result): |
|
text = detection[1].strip() |
|
if "name" in text.lower() and not "father" in text.lower(): |
|
extracted_data["Name"] = result[i+1][1].strip() if i+1 < len(result) else None |
|
elif "father" in text.lower(): |
|
extracted_data["Father Name"] = result[i+1][1].strip() if i+1 < len(result) else None |
|
elif text.lower() in ["m", "f"]: |
|
extracted_data["Gender"] = text.upper() |
|
elif re.match(r'\d{5}-\d{7}-\d', text): |
|
extracted_data["Identity Number"] = text |
|
elif re.match(r'\d{2}\.\d{2}\.\d{4}', text): |
|
if extracted_data["Date of Birth"] is None: |
|
extracted_data["Date of Birth"] = text |
|
elif extracted_data["Date of Issue"] is None: |
|
extracted_data["Date of Issue"] = text |
|
|
|
if extracted_data["Date of Issue"] and not extracted_data["Date of Expiry"]: |
|
try: |
|
date_of_issue = datetime.strptime(extracted_data["Date of Issue"], "%d.%m.%Y") |
|
date_of_expiry = date_of_issue.replace(year=date_of_issue.year + 10) |
|
extracted_data["Date of Expiry"] = date_of_expiry.strftime("%d.%m.%Y") |
|
except ValueError: |
|
pass |
|
|
|
return extracted_data |
|
|
|
def display_table(extracted_data): |
|
fields = ["Name", "Father Name", "Gender", "Country of Stay", "Identity Number", "Date of Birth", "Date of Issue", "Date of Expiry"] |
|
values = [extracted_data[field] if extracted_data[field] else "" for field in fields] |
|
df = pd.DataFrame(list(zip(fields, values)), columns=['Field', 'Value']) |
|
st.dataframe(df) |
|
|
|
def get_csv_download_link(df): |
|
csv = df.to_csv(index=False) |
|
b64 = base64.b64encode(csv.encode()).decode() |
|
href = f'<a href="data:file/csv;base64,{b64}" download="extracted_data.csv">Download CSV File</a>' |
|
return href |
|
|
|
def data_extraction_page(): |
|
st.title('ID Card Text Extraction') |
|
|
|
uploaded_file = st.file_uploader("Upload an image of your ID card to Extract Data", type=["jpg", "jpeg", "png"]) |
|
|
|
if uploaded_file is not None: |
|
image = Image.open(uploaded_file) |
|
st.image(image, caption='Wait...! We Are Extracting Data For You', use_column_width=True) |
|
extracted_data = process_image(image) |
|
display_table(extracted_data) |
|
|
|
st.markdown(get_csv_download_link(pd.DataFrame(list(extracted_data.items()), columns=['Field', 'Value'])), unsafe_allow_html=True) |
|
|