import gradio as gr import pytesseract import cv2 import os import numpy as np from annif_client import AnnifClient def get_annif_projects(): try: annif = AnnifClient() projects = annif.projects if not projects: raise ValueError("No projects found from Annif client") proj_ids = [project["project_id"] for project in projects] proj_names = [project["name"] for project in projects] return annif, proj_ids, proj_names except Exception as e: print(f"Error initializing Annif client: {str(e)}") return None, [], [] annif, proj_ids, proj_names = get_annif_projects() def process(image, project_num: int, lang: str = "eng"): try: if not proj_ids: raise ValueError("No Annif projects available") if isinstance(image, str): img = cv2.imread(image) if img is None: raise ValueError(f"Unable to read image from path: {image}") elif isinstance(image, np.ndarray): img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) else: raise ValueError("Unsupported image type") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] text = pytesseract.image_to_string(threshold_img, lang=lang) if isinstance(image, str): os.remove(image) results = annif.suggest(project_id=proj_ids[project_num], text=text) sorted_results = sorted(results, key=lambda x: x['score'], reverse=True) html_content = """

Suggested subjects

""" return text, html_content except Exception as e: return str(e), "" langs = ("eng", "fin", "swe") css = """ .gradio-container, .gradio-container *, body, .mygrclass { color: #343260 !important; background-color: #f3f3f6; color: #343260; font-family: Jost, sans-serif; font-weight: 400; font-size: 1rem; line-height: 1; } h1, h1 a { padding: 2rem 0; font-weight: 500; font-size: 2rem; text-align: center; } h2 { font-weight: 500; font-size: 1.2rem; padding: 0.5rem 0; } #get-suggestions { margin: 2rem 0 0 0; background: #6280dc; color: white !important; border: none; border-radius: 0px; } #suggestions-wrapper { background-color: #f3f3f6; padding: 1rem; } #suggestions { border-top: 1px solid #343260; padding-top: 0.5rem; text-transform: uppercase; font-size: 1.1rem; } .list-group-item { display: flex; align-items: center; padding: 1px 0; border-bottom: 1px solid #e0e0e0; } meter { width: 24px; margin-right: 10px; } meter:-moz-meter-optimum::-moz-meter-bar { background: #6280dc; } meter::-webkit-meter-bar { border: none; border-radius: 0; height: 18px; background-color: #ccc; box-shadow: 0 12px 3px -5px #e6e6e6 inset; margin: 2 rem; } meter::-webkit-meter-optimum-value { background: #6280dc; } """ with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as demo: gr.HTML("""

Annif demo with image/camera input and OCR

""") with gr.Row(): with gr.Column(scale=3): image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass") with gr.Column(scale=1): project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2]) lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass") submit_btn = gr.Button("Get text & suggestions", elem_id="get-suggestions", elem_classes="mygrclass") with gr.Row(): with gr.Column(scale=3): text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass") with gr.Column(scale=1): html_output = gr.HTML() submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output]) demo.launch()