Spaces:

pierreguillou
/

arquiteturia

Sleeping

pierreguillou commited on Dec 1, 2024

Commit

00654a0

verified ·

1 Parent(s): 8d2a9d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -147,7 +147,7 @@ def create_prompt(extracted_text: str) -> str:
     return prompt
-def extract_data_with_gemini(text_file_path: str) -> dict:
     try:
         # Initialize Gemini
         model = initialize_gemini()
@@ -157,7 +157,7 @@ def extract_data_with_gemini(text_file_path: str) -> dict:
             extracted_text = f.read()
         # Create prompt and get response
-        prompt = create_prompt(extracted_text)
         response = model.generate_content(prompt)
         # Parse the JSON response
@@ -178,6 +178,7 @@ def extract_data_with_gemini(text_file_path: str) -> dict:
 # Main Processing Function
 def process_pdf(pdf_file):
     temp_dir = os.path.join(os.getcwd(), "temp_processing")
     output_dir = os.path.join(temp_dir, 'output_images')
@@ -185,6 +186,9 @@ def process_pdf(pdf_file):
         shutil.rmtree(temp_dir)
     os.makedirs(output_dir, exist_ok=True)
     try:
         # Convert PDF to images and process
         images = convert_from_path(pdf_file.name)
@@ -206,7 +210,7 @@ def process_pdf(pdf_file):
         text_file_path = os.path.join(output_dir, 'extracted_text.txt')
         # Process with Gemini
-        extracted_data = extract_data_with_gemini(text_file_path)
         # Save extracted data to JSON file
         json_path = os.path.join(temp_dir, "extracted_data.json")

     return prompt
+def extract_data_with_gemini(text_file_path: str, path_to_data_to_extract: str) -> dict:
     try:
         # Initialize Gemini
         model = initialize_gemini()
             extracted_text = f.read()
         # Create prompt and get response
+        prompt = create_prompt(extracted_text, path_to_data_to_extract)
         response = model.generate_content(prompt)
         # Parse the JSON response
 # Main Processing Function
 def process_pdf(pdf_file):
+    template_dir = os.path.join(os.getcwd(), "templates")
     temp_dir = os.path.join(os.getcwd(), "temp_processing")
     output_dir = os.path.join(temp_dir, 'output_images')
         shutil.rmtree(temp_dir)
     os.makedirs(output_dir, exist_ok=True)
+    ## JSON of teh data to extract with descriptions
+    path_to_data_to_extract = os.path.join(template_dir, "data_to_extract.json")
     try:
         # Convert PDF to images and process
         images = convert_from_path(pdf_file.name)
         text_file_path = os.path.join(output_dir, 'extracted_text.txt')
         # Process with Gemini
+        extracted_data = extract_data_with_gemini(text_file_path, path_to_data_to_extract)
         # Save extracted data to JSON file
         json_path = os.path.join(temp_dir, "extracted_data.json")