Spaces:
Sleeping
Sleeping
File size: 2,328 Bytes
66d3cf4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
import pandas as pd
from io import StringIO
import PyPDF2
from docx import Document
# Function to extract data from a CSV file
def read_csv(file):
df = pd.read_csv(file)
return df
# Function to extract data from an Excel file
def read_excel(file):
df = pd.read_excel(file)
return df
# Function to extract text from a TXT file
def read_txt(file):
text = file.read().decode("utf-8")
return text
# Function to extract text from a DOCX file
def read_docx(file):
doc = Document(file)
text = "\n".join([para.text for para in doc.paragraphs])
return text
# Function to extract text from a PDF file
def read_pdf(file):
pdf_reader = PyPDF2.PdfFileReader(file)
text = ""
for page_num in range(pdf_reader.numPages):
page = pdf_reader.getPage(page_num)
text += page.extract_text()
return text
# Streamlit app
def main():
st.title("File Upload and Data Extraction App")
st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.")
# File uploader
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"])
if uploaded_file is not None:
# Determine file type and process accordingly
if uploaded_file.type == "text/csv":
data = read_csv(uploaded_file)
st.write("### CSV Data")
st.write(data)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
data = read_excel(uploaded_file)
st.write("### Excel Data")
st.write(data)
elif uploaded_file.type == "text/plain":
text = read_txt(uploaded_file)
st.write("### TXT Data")
st.write(text)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
text = read_docx(uploaded_file)
st.write("### DOCX Data")
st.write(text)
elif uploaded_file.type == "application/pdf":
text = read_pdf(uploaded_file)
st.write("### PDF Data")
st.write(text)
else:
st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.")
# Run the Streamlit app
if __name__ == "__main__":
main() |