louiecerv's picture
Save
5c88547
raw
history blame
2.29 kB
import streamlit as st
import os
import tempfile
import PyPDF2
def main():
st.title("PDF Metadata Extractor")
with st.expander("ℹ️ About"):
st.markdown("""This app extracts metadata from PDF files.
You can upload one or more PDF files and view their metadata.""")
st.markdown("""(c) 2024 [Louie F. Cervantes, M.Eng. (Informaation Enfineering)]
(https://huggingface.co/spaces/louiecerv)""")
# Folder uploader (using the st.file_uploader with multiple file selection)
uploaded_files = st.file_uploader("Upload PDF files:", type="pdf", accept_multiple_files=True)
if not uploaded_files:
st.warning("Please upload PDF files.")
return
try:
# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
pdf_files = []
# Save uploaded files to the temporary directory and collect their names
for uploaded_file in uploaded_files:
with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f:
f.write(uploaded_file.read())
pdf_files.append(uploaded_file.name)
if not pdf_files:
st.warning("No PDF files found in the uploaded selection.")
return
st.subheader("Detected PDF Files:")
for pdf_file in pdf_files:
st.write(pdf_file)
if st.button("Extract Metadata"):
st.subheader("PDF Metadata:")
for pdf_file in pdf_files:
try:
pdf_path = os.path.join(temp_dir, pdf_file)
with open(pdf_path, 'rb') as f:
pdf_reader = PyPDF2.PdfReader(f)
pdf_info = pdf_reader.metadata
st.write(f"**File:** {pdf_file}")
for key, value in pdf_info.items():
st.write(f"{key}: {value}")
st.write("---")
except Exception as e:
st.error(f"Error processing {pdf_file}: {e}")
except Exception as e:
st.error(f"An error occurred: {e}")
return
if __name__ == "__main__":
main()