import streamlit as st import pandas as pd from io import StringIO import PyPDF2 from docx import Document # Function to extract data from a CSV file def read_csv(file): df = pd.read_csv(file) return df # Function to extract data from an Excel file def read_excel(file): df = pd.read_excel(file) return df # Function to extract text from a TXT file def read_txt(file): text = file.read().decode("utf-8") return text # Function to extract text from a DOCX file def read_docx(file): doc = Document(file) text = "\n".join([para.text for para in doc.paragraphs]) return text # Function to extract text from a PDF file def read_pdf(file): pdf_reader = PyPDF2.PdfFileReader(file) text = "" for page_num in range(pdf_reader.numPages): page = pdf_reader.getPage(page_num) text += page.extract_text() return text # Streamlit app def main(): st.title("File Upload and Data Extraction App") st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.") # File uploader uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"]) if uploaded_file is not None: # Determine file type and process accordingly if uploaded_file.type == "text/csv": data = read_csv(uploaded_file) st.write("### CSV Data") st.write(data) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": data = read_excel(uploaded_file) st.write("### Excel Data") st.write(data) elif uploaded_file.type == "text/plain": text = read_txt(uploaded_file) st.write("### TXT Data") st.write(text) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": text = read_docx(uploaded_file) st.write("### DOCX Data") st.write(text) elif uploaded_file.type == "application/pdf": text = read_pdf(uploaded_file) st.write("### PDF Data") st.write(text) else: st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.") # Run the Streamlit app if __name__ == "__main__": main()