Spaces:

ppaihack
/

ZamaClinik

Sleeping

App Files Files Community

gregoiregllt commited on Sep 28, 2024

Commit

b65ff94

1 Parent(s): 0ebbd8e

go

Browse files

Files changed (2) hide show

app.py +21 -21
utils.py +153 -0

app.py CHANGED Viewed

@@ -1,30 +1,13 @@
-from fastapi import FastAPI, File, Form, UploadFile, Body
 from fastapi.responses import JSONResponse, Response
-from concrete.ml.deployment import FHEModelServer
 import numpy as np
 from pydantic import BaseModel
-from concrete.ml.deployment import FHEModelClient
 import subprocess
 from pathlib import Path
-from utils import (
-    CLIENT_DIR,
-    CURRENT_DIR,
-    DEPLOYMENT_DIR,
-    SERVER_DIR,
-    INPUT_BROWSER_LIMIT,
-    KEYS_DIR,
-    SERVER_URL,
-    TARGET_COLUMNS,
-    TRAINING_FILENAME,
-    clean_directory,
-    get_disease_name,
-    load_data,
-    pretty_print,
-)
 import time
 from typing import List
@@ -37,6 +20,23 @@ class Symptoms(BaseModel):
 app = FastAPI()
 @app.get("/")
 def greet_json():
     return {"Hello": "World!"}

+from fastapi import FastAPI, File, Form, UploadFile, HTTPException
 from fastapi.responses import JSONResponse, Response
+from utils import extract_json_from_images
 import numpy as np
 from pydantic import BaseModel
 import subprocess
 from pathlib import Path
 import time
 from typing import List
 app = FastAPI()
+@app.post("/extract-json")
+async def extract_json(files: List[UploadFile] = File(...)):
+    try:
+        # Read the uploaded images
+        uploaded_images = [file.file for file in files]
+        # Extract JSON from images
+        json_data = extract_json_from_images(uploaded_images)
+        # Close the file objects
+        for file in uploaded_images:
+            file.close()
+        return JSONResponse(content=json_data)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def greet_json():
     return {"Hello": "World!"}

utils.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import streamlit as st
+import requests
+from PIL import Image
+import pytesseract
+import os
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+import re
+import json
+api_key = os.environ.get("HFBearer")
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
+# API URL and headers
+API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
+# Function to extract text from image
+def extract_text_from_image(image):
+    text = pytesseract.image_to_string(image)
+    return text
+# Function to extract JSON from text
+def extract_json(text):
+    # Use regex to find the JSON between <JSON> and </JSON>
+    match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
+    if match:
+        json_str = match.group(1)  # Get the JSON string
+        try:
+            # Load the JSON string into a Python dictionary
+            json_data = json.loads(json_str)
+            return json_data
+        except json.JSONDecodeError:
+            return "Erreur de décodage JSON"
+    else:
+        return "Aucun JSON trouvé"
+# Function to get metadata title from image
+def get_image_metadata(image):
+    # You can customize this function to extract other metadata as needed
+    title = image.name.split('.')[0]  # Simple title extraction from file name without extension
+    return title
+def count_tokens(text):
+    return len(text.split())
+image_params = {
+    "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
+    "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
+    "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
+    "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
+    "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
+    "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
+    "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
+    "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
+    "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
+}
+user_input = """
+Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
+Liste des paramètres : {parameters}
+Voici un exemple de réponse valide :
+<JSON>
+{{"date_naissance": "", "prenom": "", "nom": ""}}
+</JSON>
+Voici le texte à partir duquel vous devez extraire les paramètres :
+{texte}
+"""
+# prompt = PromptTemplate.from_template(user_input)
+llm = HuggingFaceEndpoint(
+    endpoint_url=API_URL,
+)
+# llm_chain = prompt | llm
+# # File uploader for multiple images
+# uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
+# # Modify the Streamlit section to extract the JSON for multiple images
+# if st.button("Submit"):
+#     if uploaded_images:
+#         all_json_data = {}  # Dictionary to store JSON data for each image
+#         for uploaded_image in uploaded_images:
+#             with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
+#                 image = Image.open(uploaded_image)
+#                 extracted_text = extract_text_from_image(image)
+#                 max_text_length = 500  # Adjust as needed to keep total tokens under 1024
+#                 if count_tokens(extracted_text) > max_text_length:
+#                     extracted_text = " ".join(extracted_text.split()[:max_text_length])
+#                 with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
+#                     # Get metadata title from the image
+#                     title = get_image_metadata(uploaded_image)
+#                     parameters = image_params[title]
+#                     output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
+#                     st.success(f"Response received for {uploaded_image.name}!")
+#                     # Extract JSON from the API output
+#                     json_data = extract_json(output)  # Extract JSON from the API output
+#                     all_json_data[title] = json_data  # Store JSON data with title as key
+#                     st.write(title, json_data)
+#         # Display all extracted JSON data
+#         st.write("Extracted JSON Data for all images.")
+#     else:
+#         st.warning("Please upload at least one image to extract text.")
+def extract_json_from_images(uploaded_images):
+    all_json_data = {}  # Dictionary to store JSON data for each image
+    for uploaded_image in uploaded_images:
+        image = Image.open(uploaded_image)
+        extracted_text = extract_text_from_image(image)
+        max_text_length = 500  # Adjust as needed to keep total tokens under 1024
+        if count_tokens(extracted_text) > max_text_length:
+            extracted_text = " ".join(extracted_text.split()[:max_text_length])
+        # Get metadata title from the image
+        title = get_image_metadata(uploaded_image)
+        parameters = image_params[title]
+        # Prepare the prompt and invoke the LLM chain
+        user_input = """
+        Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
+        Liste des paramètres : {parameters}
+        Voici un exemple de réponse valide :
+        <JSON>
+        {{"date_naissance": "", "prenom": "", "nom": ""}}
+        </JSON>
+        Voici le texte à partir duquel vous devez extraire les paramètres :
+        {texte}
+        """
+        prompt = PromptTemplate.from_template(user_input)
+        llm_chain = prompt | llm
+        output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
+        # Extract JSON from the API output
+        json_data = extract_json(output)  # Extract JSON from the API output
+        all_json_data[title] = json_data  # Store JSON data with title as key
+    return all_json_data