import streamlit as st import pandas as pd import fitz # PyMuPDF from transformers import pipeline # Load pre-trained model and tokenizer from Hugging Face model_name = "google-bert/bert-base-uncased" pipe = pipeline("text-classification", model=model_name) # Custom labels for your classification task labels = { "LABEL_0": "Negative", "LABEL_1": "Positive" } # Streamlit app st.title("BERT Text Classification") st.write("This app uses a pre-trained BERT model to classify text into positive or negative sentiment.") # Input text area input_text = st.text_area("Enter text to classify") def classify_text(text): result = pipe(text)[0] label = labels.get(result['label'], result['label']) score = result['score'] # Adjust classification based on score if score < 0.75: label = "Negative" return label, score if st.button("Classify"): if input_text: # Perform classification label, score = classify_text(input_text) st.write(f"**Predicted Class:** {label}") st.write(f"**Confidence:** {score:.4f}") else: st.write("Please enter some text to classify.") # File upload section st.write("Upload a file for classification:") uploaded_file = st.file_uploader("Choose a file", type=["csv", "pdf"]) if uploaded_file is not None: try: if uploaded_file.type == "text/csv": # Process CSV file df = pd.read_csv(uploaded_file, encoding='utf-8') if 'text' not in df.columns: st.write("The CSV file must contain a 'text' column.") else: df['Prediction'] = df['text'].apply(lambda x: classify_text(x)[0]) df['Confidence'] = df['text'].apply(lambda x: classify_text(x)[1]) st.write(df) elif uploaded_file.type == "application/pdf": # Process PDF file with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: text = "" for page in doc: text += page.get_text() # Perform classification label, score = classify_text(text) st.write(f"**Predicted Class for PDF:** {label}") st.write(f"**Confidence:** {score:.4f}") except Exception as e: st.error(f"Error: {e}")