Carlos Salgado commited on
Commit
2e3cdd3
1 Parent(s): 9813b6b

rework io bug

Browse files
Files changed (2) hide show
  1. app.py +9 -3
  2. scripts.py +8 -5
app.py CHANGED
@@ -12,11 +12,17 @@ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf","txt"])
12
 
13
  if uploaded_file is not None:
14
  try:
15
- file_ext = uploaded_file.name.split('.')[-1].lower()
16
- pdf_file = io.BytesIO(uploaded_file.read())
17
- docs = ingest(pdf_file, file_ext)
 
 
18
  metadata = generate_metadata(docs)
19
  st.write('## Converted Text')
20
  st.write(metadata)
 
 
 
 
21
  except Exception as e:
22
  st.error(f'Error: {e}')
 
12
 
13
  if uploaded_file is not None:
14
  try:
15
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp:
16
+ tmp.write(uploaded_file.read())
17
+ file_path = tmp.name
18
+
19
+ docs = ingest(file_path)
20
  metadata = generate_metadata(docs)
21
  st.write('## Converted Text')
22
  st.write(metadata)
23
+
24
+ # Clean up the temporary file
25
+ os.remove(file_path)
26
+
27
  except Exception as e:
28
  st.error(f'Error: {e}')
scripts.py CHANGED
@@ -15,11 +15,13 @@ load_dotenv()
15
 
16
  import io
17
 
18
- def ingest(file_obj, file_ext='pdf'):
19
- if file_ext == 'pdf':
20
- loader = UnstructuredPDFLoader(file_obj)
21
- elif file_ext == 'txt':
22
- loader = TextLoader(file_obj)
 
 
23
  else:
24
  raise NotImplementedError('Only .txt or .pdf files are supported')
25
 
@@ -43,6 +45,7 @@ def ingest(file_obj, file_ext='pdf'):
43
  return docs
44
 
45
 
 
46
  def generate_metadata(docs):
47
  prompt_template = """
48
  BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
 
15
 
16
  import io
17
 
18
+ def ingest(file_path):
19
+ extension = os.path.splitext(file_path)[1].lower()
20
+
21
+ if extension == '.pdf':
22
+ loader = UnstructuredPDFLoader(file_path)
23
+ elif extension == '.txt':
24
+ loader = TextLoader(file_path)
25
  else:
26
  raise NotImplementedError('Only .txt or .pdf files are supported')
27
 
 
45
  return docs
46
 
47
 
48
+
49
  def generate_metadata(docs):
50
  prompt_template = """
51
  BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']