RAG-JAN-2025 / app.py
muhammadshaheryar's picture
Create app.py
66d3cf4 verified
import streamlit as st
import pandas as pd
from io import StringIO
import PyPDF2
from docx import Document
# Function to extract data from a CSV file
def read_csv(file):
df = pd.read_csv(file)
return df
# Function to extract data from an Excel file
def read_excel(file):
df = pd.read_excel(file)
return df
# Function to extract text from a TXT file
def read_txt(file):
text = file.read().decode("utf-8")
return text
# Function to extract text from a DOCX file
def read_docx(file):
doc = Document(file)
text = "\n".join([para.text for para in doc.paragraphs])
return text
# Function to extract text from a PDF file
def read_pdf(file):
pdf_reader = PyPDF2.PdfFileReader(file)
text = ""
for page_num in range(pdf_reader.numPages):
page = pdf_reader.getPage(page_num)
text += page.extract_text()
return text
# Streamlit app
def main():
st.title("File Upload and Data Extraction App")
st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.")
# File uploader
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"])
if uploaded_file is not None:
# Determine file type and process accordingly
if uploaded_file.type == "text/csv":
data = read_csv(uploaded_file)
st.write("### CSV Data")
st.write(data)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
data = read_excel(uploaded_file)
st.write("### Excel Data")
st.write(data)
elif uploaded_file.type == "text/plain":
text = read_txt(uploaded_file)
st.write("### TXT Data")
st.write(text)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
text = read_docx(uploaded_file)
st.write("### DOCX Data")
st.write(text)
elif uploaded_file.type == "application/pdf":
text = read_pdf(uploaded_file)
st.write("### PDF Data")
st.write(text)
else:
st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.")
# Run the Streamlit app
if __name__ == "__main__":
main()