import streamlit as st import pandas as pd import torch import tiktoken from transformers import GPT2Tokenizer, GPT2Model # Load the model and tokenizer tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = GPT2Model.from_pretrained("gpt2") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def classify_review(text, model, tokenizer, device, max_length=128, pad_token_id=50256): model.eval() input_ids = tokenizer.encode(text, return_tensors='pt').to(device) input_ids = input_ids[:, :max_length] input_ids = torch.nn.functional.pad(input_ids, (0, max_length - input_ids.shape[1]), value=pad_token_id) with torch.no_grad(): outputs = model(input_ids) logits = outputs.last_hidden_state[:, -1, :] predicted_label = torch.argmax(logits, dim=-1).item() label_mapping = { 0: "Pressure Safety Device", 1: "Piping", 2: "Pressure Vessel (VIE)", 3: "FU Items", 4: "Non Structural Tank", 5: "Structure", 6: "Corrosion Monitoring", 7: "Flame Arrestor", 8: "Pressure Vessel (VII)", 9: "Lifting" } return label_mapping.get(predicted_label, "Unknown") def main(): st.title("ItemClass Scope Classifier") input_option = st.radio("Select input option", ("Single Text Query", "Upload Table")) if input_option == "Single Text Query": text_query = st.text_input("Enter text query") if st.button("Classify"): if text_query: predicted_label = classify_review(text_query, model, tokenizer, device) st.write("Predicted Label:") st.write(predicted_label) else: st.warning("Please enter a text query.") elif input_option == "Upload Table": uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"]) if uploaded_file is not None: if uploaded_file.name.endswith(".csv"): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) text_column = st.selectbox("Select the text column", df.columns) predicted_labels = [classify_review(text, model, tokenizer, device) for text in df[text_column]] df["Predicted Label"] = predicted_labels st.write(df) if __name__ == "__main__": main()