import streamlit as st
import pandas as pd
import torch
import tiktoken
from transformers import GPT2Tokenizer, GPT2Model

# Load the model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def classify_review(text, model, tokenizer, device, max_length=128, pad_token_id=50256):
    model.eval()
    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)
    input_ids = input_ids[:, :max_length]
    input_ids = torch.nn.functional.pad(input_ids, (0, max_length - input_ids.shape[1]), value=pad_token_id)
    with torch.no_grad():
        outputs = model(input_ids)
    logits = outputs.last_hidden_state[:, -1, :]
    predicted_label = torch.argmax(logits, dim=-1).item()

    label_mapping = {
        0: "Pressure Safety Device",
        1: "Piping",
        2: "Pressure Vessel (VIE)",
        3: "FU Items",
        4: "Non Structural Tank",
        5: "Structure",
        6: "Corrosion Monitoring",
        7: "Flame Arrestor",
        8: "Pressure Vessel (VII)",
        9: "Lifting"
    }
    return label_mapping.get(predicted_label, "Unknown")

def main():
    st.title("ItemClass Scope Classifier")

    input_option = st.radio("Select input option", ("Single Text Query", "Upload Table"))

    if input_option == "Single Text Query":
        text_query = st.text_input("Enter text query")
        if st.button("Classify"):
            if text_query:
                predicted_label = classify_review(text_query, model, tokenizer, device)
                st.write("Predicted Label:")
                st.write(predicted_label)
            else:
                st.warning("Please enter a text query.")

    elif input_option == "Upload Table":
        uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
        if uploaded_file is not None:
            if uploaded_file.name.endswith(".csv"):
                df = pd.read_csv(uploaded_file)
            else:
                df = pd.read_excel(uploaded_file)

            text_column = st.selectbox("Select the text column", df.columns)
            predicted_labels = [classify_review(text, model, tokenizer, device) for text in df[text_column]]
            df["Predicted Label"] = predicted_labels
            st.write(df)

if __name__ == "__main__":
    main()