Spaces:

louiecerv
/

pdf-meta-extractor

Sleeping

App Files Files Community

pdf-meta-extractor / app.py

louiecerv

Save

5c88547 about 1 month ago

raw

history blame

2.29 kB

	import streamlit as st
	import os
	import tempfile
	import PyPDF2

	def main():
	st.title("PDF Metadata Extractor")

	with st.expander("ℹ️ About"):
	st.markdown("""This app extracts metadata from PDF files.
	You can upload one or more PDF files and view their metadata.""")
	st.markdown("""(c) 2024 [Louie F. Cervantes, M.Eng. (Informaation Enfineering)]
	(https://huggingface.co/spaces/louiecerv)""")

	# Folder uploader (using the st.file_uploader with multiple file selection)
	uploaded_files = st.file_uploader("Upload PDF files:", type="pdf", accept_multiple_files=True)

	if not uploaded_files:
	st.warning("Please upload PDF files.")
	return

	try:
	# Create a temporary directory
	with tempfile.TemporaryDirectory() as temp_dir:
	pdf_files = []

	# Save uploaded files to the temporary directory and collect their names
	for uploaded_file in uploaded_files:
	with open(os.path.join(temp_dir, uploaded_file.name), "wb") as f:
	f.write(uploaded_file.read())
	pdf_files.append(uploaded_file.name)

	if not pdf_files:
	st.warning("No PDF files found in the uploaded selection.")
	return

	st.subheader("Detected PDF Files:")
	for pdf_file in pdf_files:
	st.write(pdf_file)

	if st.button("Extract Metadata"):
	st.subheader("PDF Metadata:")
	for pdf_file in pdf_files:
	try:
	pdf_path = os.path.join(temp_dir, pdf_file)
	with open(pdf_path, 'rb') as f:
	pdf_reader = PyPDF2.PdfReader(f)
	pdf_info = pdf_reader.metadata

	st.write(f"File: {pdf_file}")
	for key, value in pdf_info.items():
	st.write(f"{key}: {value}")
	st.write("---")

	except Exception as e:
	st.error(f"Error processing {pdf_file}: {e}")

	except Exception as e:
	st.error(f"An error occurred: {e}")
	return

	if __name__ == "__main__":
	main()