Spaces:
Running
Running
Carlos Salgado
commited on
Commit
•
2e3cdd3
1
Parent(s):
9813b6b
rework io bug
Browse files- app.py +9 -3
- scripts.py +8 -5
app.py
CHANGED
@@ -12,11 +12,17 @@ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf","txt"])
|
|
12 |
|
13 |
if uploaded_file is not None:
|
14 |
try:
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
metadata = generate_metadata(docs)
|
19 |
st.write('## Converted Text')
|
20 |
st.write(metadata)
|
|
|
|
|
|
|
|
|
21 |
except Exception as e:
|
22 |
st.error(f'Error: {e}')
|
|
|
12 |
|
13 |
if uploaded_file is not None:
|
14 |
try:
|
15 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp:
|
16 |
+
tmp.write(uploaded_file.read())
|
17 |
+
file_path = tmp.name
|
18 |
+
|
19 |
+
docs = ingest(file_path)
|
20 |
metadata = generate_metadata(docs)
|
21 |
st.write('## Converted Text')
|
22 |
st.write(metadata)
|
23 |
+
|
24 |
+
# Clean up the temporary file
|
25 |
+
os.remove(file_path)
|
26 |
+
|
27 |
except Exception as e:
|
28 |
st.error(f'Error: {e}')
|
scripts.py
CHANGED
@@ -15,11 +15,13 @@ load_dotenv()
|
|
15 |
|
16 |
import io
|
17 |
|
18 |
-
def ingest(
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
loader =
|
|
|
|
|
23 |
else:
|
24 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
25 |
|
@@ -43,6 +45,7 @@ def ingest(file_obj, file_ext='pdf'):
|
|
43 |
return docs
|
44 |
|
45 |
|
|
|
46 |
def generate_metadata(docs):
|
47 |
prompt_template = """
|
48 |
BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
|
|
|
15 |
|
16 |
import io
|
17 |
|
18 |
+
def ingest(file_path):
|
19 |
+
extension = os.path.splitext(file_path)[1].lower()
|
20 |
+
|
21 |
+
if extension == '.pdf':
|
22 |
+
loader = UnstructuredPDFLoader(file_path)
|
23 |
+
elif extension == '.txt':
|
24 |
+
loader = TextLoader(file_path)
|
25 |
else:
|
26 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
27 |
|
|
|
45 |
return docs
|
46 |
|
47 |
|
48 |
+
|
49 |
def generate_metadata(docs):
|
50 |
prompt_template = """
|
51 |
BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
|