Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import tempfile | |
import PyPDF2 | |
def main(): | |
st.title("PDF Metadata Extractor") | |
with st.expander("ℹ️ About"): | |
st.markdown("""This app extracts metadata from PDF files. | |
You can upload one or more PDF files and view their metadata.""") | |
st.markdown("""(c) 2024 [Louie F. Cervantes, M.Eng. (Informaation Enfineering)] | |
(https://huggingface.co/spaces/louiecerv)""") | |
# Folder uploader (using the st.file_uploader with multiple file selection) | |
uploaded_files = st.file_uploader("Upload PDF files:", type="pdf", accept_multiple_files=True) | |
if not uploaded_files: | |
st.warning("Please upload PDF files.") | |
return | |
try: | |
# Create a temporary directory | |
with tempfile.TemporaryDirectory() as temp_dir: | |
pdf_files = [] | |
# Save uploaded files to the temporary directory and collect their names | |
for uploaded_file in uploaded_files: | |
with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f: | |
f.write(uploaded_file.read()) | |
pdf_files.append(uploaded_file.name) | |
if not pdf_files: | |
st.warning("No PDF files found in the uploaded selection.") | |
return | |
st.subheader("Detected PDF Files:") | |
for pdf_file in pdf_files: | |
st.write(pdf_file) | |
if st.button("Extract Metadata"): | |
st.subheader("PDF Metadata:") | |
for pdf_file in pdf_files: | |
try: | |
pdf_path = os.path.join(temp_dir, pdf_file) | |
with open(pdf_path, 'rb') as f: | |
pdf_reader = PyPDF2.PdfReader(f) | |
pdf_info = pdf_reader.metadata | |
st.write(f"**File:** {pdf_file}") | |
for key, value in pdf_info.items(): | |
st.write(f"{key}: {value}") | |
st.write("---") | |
except Exception as e: | |
st.error(f"Error processing {pdf_file}: {e}") | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
return | |
if __name__ == "__main__": | |
main() |