gregoiregllt commited on
Commit
b65ff94
·
1 Parent(s): 0ebbd8e
Files changed (2) hide show
  1. app.py +21 -21
  2. utils.py +153 -0
app.py CHANGED
@@ -1,30 +1,13 @@
1
- from fastapi import FastAPI, File, Form, UploadFile, Body
2
  from fastapi.responses import JSONResponse, Response
3
- from concrete.ml.deployment import FHEModelServer
 
 
4
  import numpy as np
5
  from pydantic import BaseModel
6
 
7
-
8
- from concrete.ml.deployment import FHEModelClient
9
  import subprocess
10
  from pathlib import Path
11
-
12
- from utils import (
13
- CLIENT_DIR,
14
- CURRENT_DIR,
15
- DEPLOYMENT_DIR,
16
- SERVER_DIR,
17
- INPUT_BROWSER_LIMIT,
18
- KEYS_DIR,
19
- SERVER_URL,
20
- TARGET_COLUMNS,
21
- TRAINING_FILENAME,
22
- clean_directory,
23
- get_disease_name,
24
- load_data,
25
- pretty_print,
26
- )
27
-
28
  import time
29
  from typing import List
30
 
@@ -37,6 +20,23 @@ class Symptoms(BaseModel):
37
 
38
  app = FastAPI()
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @app.get("/")
41
  def greet_json():
42
  return {"Hello": "World!"}
 
1
+ from fastapi import FastAPI, File, Form, UploadFile, HTTPException
2
  from fastapi.responses import JSONResponse, Response
3
+
4
+ from utils import extract_json_from_images
5
+
6
  import numpy as np
7
  from pydantic import BaseModel
8
 
 
 
9
  import subprocess
10
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  import time
12
  from typing import List
13
 
 
20
 
21
  app = FastAPI()
22
 
23
+ @app.post("/extract-json")
24
+ async def extract_json(files: List[UploadFile] = File(...)):
25
+ try:
26
+ # Read the uploaded images
27
+ uploaded_images = [file.file for file in files]
28
+
29
+ # Extract JSON from images
30
+ json_data = extract_json_from_images(uploaded_images)
31
+
32
+ # Close the file objects
33
+ for file in uploaded_images:
34
+ file.close()
35
+
36
+ return JSONResponse(content=json_data)
37
+ except Exception as e:
38
+ raise HTTPException(status_code=500, detail=str(e))
39
+
40
  @app.get("/")
41
  def greet_json():
42
  return {"Hello": "World!"}
utils.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from PIL import Image
4
+ import pytesseract
5
+ import os
6
+ from langchain_huggingface import HuggingFaceEndpoint
7
+ from langchain.chains import LLMChain
8
+ from langchain_core.prompts import PromptTemplate
9
+ import re
10
+ import json
11
+
12
+ api_key = os.environ.get("HFBearer")
13
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
14
+
15
+ # API URL and headers
16
+ API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
17
+
18
+ # Function to extract text from image
19
+ def extract_text_from_image(image):
20
+ text = pytesseract.image_to_string(image)
21
+ return text
22
+
23
+ # Function to extract JSON from text
24
+ def extract_json(text):
25
+ # Use regex to find the JSON between <JSON> and </JSON>
26
+ match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
27
+
28
+ if match:
29
+ json_str = match.group(1) # Get the JSON string
30
+ try:
31
+ # Load the JSON string into a Python dictionary
32
+ json_data = json.loads(json_str)
33
+ return json_data
34
+ except json.JSONDecodeError:
35
+ return "Erreur de décodage JSON"
36
+ else:
37
+ return "Aucun JSON trouvé"
38
+
39
+ # Function to get metadata title from image
40
+ def get_image_metadata(image):
41
+ # You can customize this function to extract other metadata as needed
42
+ title = image.name.split('.')[0] # Simple title extraction from file name without extension
43
+ return title
44
+
45
+ def count_tokens(text):
46
+ return len(text.split())
47
+
48
+ image_params = {
49
+ "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
50
+ "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
51
+ "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
52
+ "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
53
+ "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
54
+ "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
55
+ "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
56
+ "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
57
+ "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
58
+ }
59
+
60
+ user_input = """
61
+ Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
62
+ Liste des paramètres : {parameters}
63
+
64
+ Voici un exemple de réponse valide :
65
+ <JSON>
66
+ {{"date_naissance": "", "prenom": "", "nom": ""}}
67
+ </JSON>
68
+
69
+ Voici le texte à partir duquel vous devez extraire les paramètres :
70
+ {texte}
71
+ """
72
+
73
+ # prompt = PromptTemplate.from_template(user_input)
74
+
75
+ llm = HuggingFaceEndpoint(
76
+ endpoint_url=API_URL,
77
+ )
78
+
79
+ # llm_chain = prompt | llm
80
+
81
+ # # File uploader for multiple images
82
+ # uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
83
+
84
+ # # Modify the Streamlit section to extract the JSON for multiple images
85
+ # if st.button("Submit"):
86
+ # if uploaded_images:
87
+ # all_json_data = {} # Dictionary to store JSON data for each image
88
+ # for uploaded_image in uploaded_images:
89
+ # with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
90
+ # image = Image.open(uploaded_image)
91
+ # extracted_text = extract_text_from_image(image)
92
+
93
+ # max_text_length = 500 # Adjust as needed to keep total tokens under 1024
94
+ # if count_tokens(extracted_text) > max_text_length:
95
+ # extracted_text = " ".join(extracted_text.split()[:max_text_length])
96
+
97
+ # with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
98
+ # # Get metadata title from the image
99
+ # title = get_image_metadata(uploaded_image)
100
+ # parameters = image_params[title]
101
+ # output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
102
+ # st.success(f"Response received for {uploaded_image.name}!")
103
+
104
+ # # Extract JSON from the API output
105
+ # json_data = extract_json(output) # Extract JSON from the API output
106
+ # all_json_data[title] = json_data # Store JSON data with title as key
107
+ # st.write(title, json_data)
108
+
109
+ # # Display all extracted JSON data
110
+ # st.write("Extracted JSON Data for all images.")
111
+ # else:
112
+ # st.warning("Please upload at least one image to extract text.")
113
+
114
+
115
+
116
+ def extract_json_from_images(uploaded_images):
117
+ all_json_data = {} # Dictionary to store JSON data for each image
118
+
119
+ for uploaded_image in uploaded_images:
120
+ image = Image.open(uploaded_image)
121
+ extracted_text = extract_text_from_image(image)
122
+
123
+ max_text_length = 500 # Adjust as needed to keep total tokens under 1024
124
+ if count_tokens(extracted_text) > max_text_length:
125
+ extracted_text = " ".join(extracted_text.split()[:max_text_length])
126
+
127
+ # Get metadata title from the image
128
+ title = get_image_metadata(uploaded_image)
129
+ parameters = image_params[title]
130
+
131
+ # Prepare the prompt and invoke the LLM chain
132
+ user_input = """
133
+ Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
134
+ Liste des paramètres : {parameters}
135
+
136
+ Voici un exemple de réponse valide :
137
+ <JSON>
138
+ {{"date_naissance": "", "prenom": "", "nom": ""}}
139
+ </JSON>
140
+
141
+ Voici le texte à partir duquel vous devez extraire les paramètres :
142
+ {texte}
143
+ """
144
+ prompt = PromptTemplate.from_template(user_input)
145
+ llm_chain = prompt | llm
146
+
147
+ output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
148
+
149
+ # Extract JSON from the API output
150
+ json_data = extract_json(output) # Extract JSON from the API output
151
+ all_json_data[title] = json_data # Store JSON data with title as key
152
+
153
+ return all_json_data