anmolsahai commited on
Commit
bf13bd6
1 Parent(s): 3056140
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import base64
3
- from pdfminer.high_level import extract_text
4
  from langchain_core.prompts import PromptTemplate
5
  from google.cloud import aiplatform
6
  from google.cloud.aiplatform_v1 import ModelServiceClient
@@ -20,7 +20,7 @@ documents = [
20
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK")),
21
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJiUlRU9GCg==")),
22
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg==")),
23
- Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))
24
  ]
25
 
26
  text1 = """
@@ -109,8 +109,12 @@ if uploaded_file is not None:
109
  file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
110
  st.write(file_details)
111
 
112
- # Display the content of the uploaded file
113
- content = extract_text(uploaded_file)
 
 
 
 
114
  st.text(content)
115
 
116
  # Placeholder for user inputs
 
1
  import os
2
  import base64
3
+ import fitz # PyMuPDF
4
  from langchain_core.prompts import PromptTemplate
5
  from google.cloud import aiplatform
6
  from google.cloud.aiplatform_v1 import ModelServiceClient
 
20
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK")),
21
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJiUlRU9GCg==")),
22
  Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg==")),
23
+ Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JqDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))
24
  ]
25
 
26
  text1 = """
 
109
  file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
110
  st.write(file_details)
111
 
112
+ # Extract text from the uploaded PDF file
113
+ pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
114
+ content = ""
115
+ for page_num in range(len(pdf_document)):
116
+ page = pdf_document.load_page(page_num)
117
+ content += page.get_text()
118
  st.text(content)
119
 
120
  # Placeholder for user inputs