Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from io import StringIO | |
import PyPDF2 | |
from docx import Document | |
# Function to extract data from a CSV file | |
def read_csv(file): | |
df = pd.read_csv(file) | |
return df | |
# Function to extract data from an Excel file | |
def read_excel(file): | |
df = pd.read_excel(file) | |
return df | |
# Function to extract text from a TXT file | |
def read_txt(file): | |
text = file.read().decode("utf-8") | |
return text | |
# Function to extract text from a DOCX file | |
def read_docx(file): | |
doc = Document(file) | |
text = "\n".join([para.text for para in doc.paragraphs]) | |
return text | |
# Function to extract text from a PDF file | |
def read_pdf(file): | |
pdf_reader = PyPDF2.PdfFileReader(file) | |
text = "" | |
for page_num in range(pdf_reader.numPages): | |
page = pdf_reader.getPage(page_num) | |
text += page.extract_text() | |
return text | |
# Streamlit app | |
def main(): | |
st.title("File Upload and Data Extraction App") | |
st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.") | |
# File uploader | |
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"]) | |
if uploaded_file is not None: | |
# Determine file type and process accordingly | |
if uploaded_file.type == "text/csv": | |
data = read_csv(uploaded_file) | |
st.write("### CSV Data") | |
st.write(data) | |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": | |
data = read_excel(uploaded_file) | |
st.write("### Excel Data") | |
st.write(data) | |
elif uploaded_file.type == "text/plain": | |
text = read_txt(uploaded_file) | |
st.write("### TXT Data") | |
st.write(text) | |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
text = read_docx(uploaded_file) | |
st.write("### DOCX Data") | |
st.write(text) | |
elif uploaded_file.type == "application/pdf": | |
text = read_pdf(uploaded_file) | |
st.write("### PDF Data") | |
st.write(text) | |
else: | |
st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.") | |
# Run the Streamlit app | |
if __name__ == "__main__": | |
main() |