Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pytesseract | |
import cv2 | |
import os | |
import numpy as np | |
from annif_client import AnnifClient | |
def get_annif_projects(): | |
try: | |
annif = AnnifClient() | |
projects = annif.projects | |
if not projects: | |
raise ValueError("No projects found from Annif client") | |
proj_ids = [project["project_id"] for project in projects] | |
proj_names = [project["name"] for project in projects] | |
return annif, proj_ids, proj_names | |
except Exception as e: | |
print(f"Error initializing Annif client: {str(e)}") | |
return None, [], [] | |
annif, proj_ids, proj_names = get_annif_projects() | |
def process(image, project_num: int, lang: str = "eng"): | |
try: | |
if not proj_ids: | |
raise ValueError("No Annif projects available") | |
if isinstance(image, str): | |
img = cv2.imread(image) | |
if img is None: | |
raise ValueError(f"Unable to read image from path: {image}") | |
elif isinstance(image, np.ndarray): | |
img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
else: | |
raise ValueError("Unsupported image type") | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] | |
text = pytesseract.image_to_string(threshold_img, lang=lang) | |
if isinstance(image, str): | |
os.remove(image) | |
results = annif.suggest(project_id=proj_ids[project_num], text=text) | |
sorted_results = sorted(results, key=lambda x: x['score'], reverse=True) | |
html_content = """ | |
<div id="suggestions-wrapper"> | |
<h2 id="suggestions">Suggested subjects</h2> | |
<ul class="list-group" id="results"> | |
""" | |
for result in sorted_results: | |
html_content += f""" | |
<li class="list-group-item"> | |
<meter value="{result['score']}" min="0" max="1"></meter> | |
<a href="{result['uri']}">{result['label']}</a> | |
</li> | |
""" | |
html_content += """ | |
</ul> | |
</div> | |
""" | |
return text, html_content | |
except Exception as e: | |
return str(e), "" | |
langs = ("eng", "fin", "swe") | |
css = """ | |
.gradio-container, .gradio-container *, body, .mygrclass { | |
color: #343260 !important; | |
background-color: #f3f3f6; | |
color: #343260; | |
font-family: Jost, sans-serif; | |
font-weight: 400; | |
font-size: 1rem; | |
line-height: 1; | |
} | |
h1, h1 a { | |
padding: 2rem 0; | |
font-weight: 500; | |
font-size: 2rem; | |
text-align: center; | |
} | |
h2 { | |
font-weight: 500; | |
font-size: 1.2rem; | |
padding: 0.5rem 0; | |
} | |
#get-suggestions { | |
margin: 2rem 0 0 0; | |
background: #6280dc; | |
color: white !important; | |
border: none; | |
border-radius: 0px; | |
} | |
#suggestions-wrapper { | |
background-color: #f3f3f6; | |
padding: 1rem; | |
} | |
#suggestions { | |
border-top: 1px solid #343260; | |
padding-top: 0.5rem; | |
text-transform: uppercase; | |
font-size: 1.1rem; | |
} | |
.list-group-item { | |
display: flex; | |
align-items: center; | |
padding: 1px 0; | |
border-bottom: 1px solid #e0e0e0; | |
} | |
meter { | |
width: 24px; | |
margin-right: 10px; | |
} | |
meter:-moz-meter-optimum::-moz-meter-bar { | |
background: #6280dc; | |
} | |
meter::-webkit-meter-bar { | |
border: none; | |
border-radius: 0; | |
height: 18px; | |
background-color: #ccc; | |
box-shadow: 0 12px 3px -5px #e6e6e6 inset; | |
margin: 2 rem; | |
} | |
meter::-webkit-meter-optimum-value { | |
background: #6280dc; | |
} | |
""" | |
with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as demo: | |
gr.HTML(""" | |
<h1><a href="https://annif.org">Annif</a> demo with image/camera input and OCR</h1> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass") | |
with gr.Column(scale=1): | |
project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2]) | |
lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass") | |
submit_btn = gr.Button("Get text & suggestions", elem_id="get-suggestions", elem_classes="mygrclass") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass") | |
with gr.Column(scale=1): | |
html_output = gr.HTML() | |
submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output]) | |
demo.launch() | |