Spaces:
Running
Running
import gradio as gr | |
from easyocr import Reader | |
from PIL import Image | |
import io | |
import json | |
import csv | |
import openai | |
import ast | |
import os | |
from deta import Deta | |
###################### | |
import requests | |
import json | |
import os | |
######################## | |
reader = Reader(["tr"]) | |
API_TOKEN = os.getenv("HF_TOKEN") | |
def get_parsed_address(input_img): | |
address_full_text = get_text(input_img) | |
return query(address_full_text) | |
def preprocess_img(inp_image): | |
gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY) | |
gray_img = cv2.bitwise_not(gray) | |
return gray_img | |
def get_text(input_img): | |
result = reader.readtext(input_img, detail=0) | |
return " ".join(result) | |
def get_address(resp): | |
entity_dict = {"il":"", "ilce":"", "Apartman/Site":"","mahalle":"","ad-soyad":"", "ic kapi no":"", "kat":"", "sokak":""} | |
for item in resp: | |
for group in entity_dict: | |
if item["entity_group"] == group: | |
entity_dict[group] += item["word"] | |
return entity_dict | |
def text_dict(input): | |
print(input) | |
eval_result = get_address(input) | |
write_db(eval_result) | |
return ( | |
str(eval_result['il']), | |
str(eval_result['ilce']), | |
str(eval_result['mahalle']), | |
str(eval_result['sokak']), | |
str(eval_result['Apartman/site']), | |
str(eval_result['tel']), | |
str(eval_result['isim']) + str(eval_result['soyad']), | |
str(eval_result['no']), | |
) | |
def save_csv(mahalle, il, sokak, apartman): | |
adres_full = [mahalle, il, sokak, apartman] | |
with open("adress_book.csv", "a", encoding="utf-8") as f: | |
write = csv.writer(f) | |
write.writerow(adres_full) | |
return adres_full | |
def get_json(mahalle, il, sokak, apartman): | |
adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman} | |
dump = json.dumps(adres, indent=4, ensure_ascii=False) | |
return dump | |
def write_db(data_dict): | |
# 2) initialize with a project key | |
deta_key = os.getenv('DETA_KEY') | |
deta = Deta(deta_key) | |
# 3) create and use as many DBs as you want! | |
users = deta.Base("deprem-ocr") | |
users.insert(data_dict) | |
import json | |
import requests | |
headers = {"Authorization": f"Bearer {API_TOKEN}"} | |
API_URL = "https://api-inference.huggingface.co/models/deprem-ml/deprem-ner" | |
def query(payload): | |
data = json.dumps(payload) | |
response = requests.request("POST", API_URL, headers=headers, data=data) | |
return json.loads(response.content.decode("utf-8")) | |
def infer(ocr_input): | |
return query({"inputs": ocr_input}) | |
""" | |
ORNEK NER OUTPUT | |
[ | |
{ | |
"entity_group": "mahalle", | |
"score": 0.8160411715507507, | |
"word": "Akevler mahallesi", | |
"start": 14, | |
"end": 31 | |
}, | |
{ | |
"entity_group": "sokak", | |
"score": 0.940501868724823, | |
"word": "Rüzgar sokak", | |
"start": 32, | |
"end": 44 | |
}, | |
{ | |
"entity_group": "Apartman/Site", | |
"score": 0.8081040978431702, | |
"word": "Tuncay apartmanı", | |
"start": 45, | |
"end": 61 | |
}, | |
{ | |
"entity_group": "ilce", | |
"score": 0.854024350643158, | |
"word": "Antakya", | |
"start": 72, | |
"end": 79 | |
} | |
] | |
""" | |
# this is not in use due to content filter | |
def openai_response(ocr_input): | |
prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from | |
plain text input and especially from emergency text that carries address information, your inputs can be text | |
of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it | |
to only extract keys that are shared as an example in the examples section, if a key value is not found in the | |
text input, then it should be ignored. Have only city, distinct, neighbourhood, | |
street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul, | |
Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz | |
Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}} | |
Input: {ocr_input} | |
Output: | |
""" | |
# openai_client = OpenAI_API() | |
# response = openai_client.single_request(ocr_input) | |
# resp = response["choices"][0]["text"] | |
# print(resp) | |
# resp = eval(resp.replace("'{", "{").replace("}'", "}")) | |
# resp["input"] = ocr_input | |
# dict_keys = [ | |
# 'city', | |
# 'distinct', | |
# 'neighbourhood', | |
# 'street', | |
# 'no', | |
# 'tel', | |
# 'name_surname', | |
# 'address', | |
# 'input', | |
# ] | |
# for key in dict_keys: | |
# if key not in resp.keys(): | |
# resp[key] = '' | |
# return resp | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# Enkaz Bildirme Uygulaması | |
""") | |
gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.") | |
with gr.Row(): | |
img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇") | |
ocr_result = gr.Textbox(label="Metin yükleyin 👇 ") | |
open_api_text = gr.Textbox(label="Tam Adres") | |
submit_button = gr.Button(label="Yükle") | |
with gr.Column(): | |
with gr.Row(): | |
city = gr.Textbox(label="İl") | |
distinct = gr.Textbox(label="İlçe") | |
with gr.Row(): | |
neighbourhood = gr.Textbox(label="Mahalle") | |
street = gr.Textbox(label="Sokak/Cadde/Bulvar") | |
with gr.Row(): | |
tel = gr.Textbox(label="Telefon") | |
with gr.Row(): | |
name_surname = gr.Textbox(label="İsim Soyisim") | |
address = gr.Textbox(label="Adres") | |
with gr.Row(): | |
no = gr.Textbox(label="Kapı No") | |
submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image") | |
ocr_result.change(query, ocr_result, open_api_text, api_name="upload-text") | |
open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no]) | |
if __name__ == "__main__": | |
demo.launch() |