pierreguillou commited on
Commit
00654a0
·
verified ·
1 Parent(s): 8d2a9d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -147,7 +147,7 @@ def create_prompt(extracted_text: str) -> str:
147
 
148
  return prompt
149
 
150
- def extract_data_with_gemini(text_file_path: str) -> dict:
151
  try:
152
  # Initialize Gemini
153
  model = initialize_gemini()
@@ -157,7 +157,7 @@ def extract_data_with_gemini(text_file_path: str) -> dict:
157
  extracted_text = f.read()
158
 
159
  # Create prompt and get response
160
- prompt = create_prompt(extracted_text)
161
  response = model.generate_content(prompt)
162
 
163
  # Parse the JSON response
@@ -178,6 +178,7 @@ def extract_data_with_gemini(text_file_path: str) -> dict:
178
 
179
  # Main Processing Function
180
  def process_pdf(pdf_file):
 
181
  temp_dir = os.path.join(os.getcwd(), "temp_processing")
182
  output_dir = os.path.join(temp_dir, 'output_images')
183
 
@@ -185,6 +186,9 @@ def process_pdf(pdf_file):
185
  shutil.rmtree(temp_dir)
186
  os.makedirs(output_dir, exist_ok=True)
187
 
 
 
 
188
  try:
189
  # Convert PDF to images and process
190
  images = convert_from_path(pdf_file.name)
@@ -206,7 +210,7 @@ def process_pdf(pdf_file):
206
  text_file_path = os.path.join(output_dir, 'extracted_text.txt')
207
 
208
  # Process with Gemini
209
- extracted_data = extract_data_with_gemini(text_file_path)
210
 
211
  # Save extracted data to JSON file
212
  json_path = os.path.join(temp_dir, "extracted_data.json")
 
147
 
148
  return prompt
149
 
150
+ def extract_data_with_gemini(text_file_path: str, path_to_data_to_extract: str) -> dict:
151
  try:
152
  # Initialize Gemini
153
  model = initialize_gemini()
 
157
  extracted_text = f.read()
158
 
159
  # Create prompt and get response
160
+ prompt = create_prompt(extracted_text, path_to_data_to_extract)
161
  response = model.generate_content(prompt)
162
 
163
  # Parse the JSON response
 
178
 
179
  # Main Processing Function
180
  def process_pdf(pdf_file):
181
+ template_dir = os.path.join(os.getcwd(), "templates")
182
  temp_dir = os.path.join(os.getcwd(), "temp_processing")
183
  output_dir = os.path.join(temp_dir, 'output_images')
184
 
 
186
  shutil.rmtree(temp_dir)
187
  os.makedirs(output_dir, exist_ok=True)
188
 
189
+ ## JSON of teh data to extract with descriptions
190
+ path_to_data_to_extract = os.path.join(template_dir, "data_to_extract.json")
191
+
192
  try:
193
  # Convert PDF to images and process
194
  images = convert_from_path(pdf_file.name)
 
210
  text_file_path = os.path.join(output_dir, 'extracted_text.txt')
211
 
212
  # Process with Gemini
213
+ extracted_data = extract_data_with_gemini(text_file_path, path_to_data_to_extract)
214
 
215
  # Save extracted data to JSON file
216
  json_path = os.path.join(temp_dir, "extracted_data.json")