File size: 3,023 Bytes
60a5e14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
from PIL import Image
import numpy as np
import easyocr
import pandas as pd
import base64
import re
from datetime import datetime, timedelta

def process_image(image):
    reader = easyocr.Reader(['en'], gpu=False)
    img_np = np.array(image)
    result = reader.readtext(img_np)

    extracted_data = {
        "Name": None,
        "Father Name": None,
        "Gender": None,
        "Country of Stay": "Pakistan",
        "Identity Number": None,
        "Date of Birth": None,
        "Date of Issue": None,
        "Date of Expiry": None
    }

    for i, detection in enumerate(result):
        text = detection[1].strip()
        if "name" in text.lower() and not "father" in text.lower():
            extracted_data["Name"] = result[i+1][1].strip() if i+1 < len(result) else None
        elif "father" in text.lower():
            extracted_data["Father Name"] = result[i+1][1].strip() if i+1 < len(result) else None
        elif text.lower() in ["m", "f"]:
            extracted_data["Gender"] = text.upper()
        elif re.match(r'\d{5}-\d{7}-\d', text):
            extracted_data["Identity Number"] = text
        elif re.match(r'\d{2}\.\d{2}\.\d{4}', text):
            if extracted_data["Date of Birth"] is None:
                extracted_data["Date of Birth"] = text
            elif extracted_data["Date of Issue"] is None:
                extracted_data["Date of Issue"] = text

    if extracted_data["Date of Issue"] and not extracted_data["Date of Expiry"]:
        try:
            date_of_issue = datetime.strptime(extracted_data["Date of Issue"], "%d.%m.%Y")
            date_of_expiry = date_of_issue.replace(year=date_of_issue.year + 10)
            extracted_data["Date of Expiry"] = date_of_expiry.strftime("%d.%m.%Y")
        except ValueError:
            pass

    return extracted_data

def display_table(extracted_data):
    fields = ["Name", "Father Name", "Gender", "Country of Stay", "Identity Number", "Date of Birth", "Date of Issue", "Date of Expiry"]
    values = [extracted_data[field] if extracted_data[field] else "" for field in fields]
    df = pd.DataFrame(list(zip(fields, values)), columns=['Field', 'Value'])
    st.dataframe(df)

def get_csv_download_link(df):
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="extracted_data.csv">Download CSV File</a>'
    return href

def data_extraction_page():
    st.title('ID Card Text Extraction')

    uploaded_file = st.file_uploader("Upload an image of your ID card to Extract Data", type=["jpg", "jpeg", "png"])

    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        st.image(image, caption='Wait...! We Are Extracting Data For You', use_column_width=True)
        extracted_data = process_image(image)
        display_table(extracted_data)

        st.markdown(get_csv_download_link(pd.DataFrame(list(extracted_data.items()), columns=['Field', 'Value'])), unsafe_allow_html=True)