Spaces:

coldn00dl3s
/

llm-human-prediction-demo

Sleeping

App Files Files Community

coldn00dl3s commited on 19 days ago

Commit

407c110

verified ·

1 Parent(s): 02a8b39

Upload 18 files

Browse files

Files changed (19) hide show

.GITIGNORE +2 -0
.env +2 -0
.gitattributes +2 -0
.streamlit/config.toml +10 -0
.streamlit/secrets.toml +2 -0
app.py +246 -0
images/arch.png +3 -0
images/plots/bilstm_baseline_acc.png +0 -0
images/plots/bilstm_baseline_loss.png +0 -0
images/plots/bilstm_hybrid_acc.png +0 -0
images/plots/bilstm_hybrid_loss.png +0 -0
images/plots/duallstm_hybrid_acc.png +0 -0
images/plots/duallstm_hybrid_loss.png +0 -0
images/plots/lstm_baseline_acc.png +0 -0
images/plots/lstm_baseline_loss.png +0 -0
images/plots/rnn_baseline_acc.png +0 -0
images/plots/rnn_baseline_loss.png +0 -0
models/hybrid_lstm_model.keras +3 -0
requirements.txt +11 -0

.GITIGNORE ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ .streamlit

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENROUTER_API_KEY = "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"
2	+ GEMINI_API_KEY = "AIzaSyCd4ZeGpQkiI_eA0iKWCTmFDMCeQVihos4"

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/arch.png filter=lfs diff=lfs merge=lfs -text
+models/hybrid_lstm_model.keras filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,10 @@

+[theme]
+base = "light"
+primaryColor = "#7C3E2E"
+backgroundColor = "#FAF3E0"
+secondaryBackgroundColor = "#F5E1C8"
+textColor = "#3B2F2F"
+font = "serif"
+[server]
+runOnSave = true

.streamlit/secrets.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ OPENROUTER_API_KEY = "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"
2	+ GEMINI_API_KEY = "AIzaSyCd4ZeGpQkiI_eA0iKWCTmFDMCeQVihos4"

app.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import re
+import nltk
+import torch
+import numpy as np
+from collections import Counter
+from nltk.tokenize import word_tokenize
+import textstat
+import json
+import requests
+import tensorflow as tf
+from keras.layers import Layer
+from transformers import DebertaV2Tokenizer, TFAutoModel
+import streamlit as st
+from google import genai
+torch.classes.__path__ = []
+# Download tokenizer data once
+nltk.download('punkt', quiet=True)
+# === Cleaning Function ===
+def clean_response(text: str) -> str:
+    # Simple markdown cleaner
+    text = re.sub(r"[*_`#>\-\[\]()]", "", text)
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+# === Gemini API ===
+def get_response_from_gemini(prompt: str, key) -> str:
+    gemini_client = genai.Client(api_key=key)
+    response = gemini_client.models.generate_content(
+        model="gemini-2.5-pro-exp-03-25",
+        contents=prompt,
+    )
+    return response.text.strip()
+# === DeepSeek API ===
+def get_response_from_deepseek(prompt: str, key) -> str:
+    response = requests.post(
+        url="https://openrouter.ai/api/v1/chat/completions",
+        headers={"Authorization": f"Bearer {key}"},
+        data=json.dumps({
+            "model": "deepseek/deepseek-r1:free",
+            "messages": [{"role": "user", "content": prompt}]
+        })
+    )
+    return response.json()["choices"][0]["message"]["content"]
+# === Metrics ===
+def calculate_entropy(text: str) -> float:
+    try:
+        tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
+        if not tokens:
+            return -999999
+        freq_dist = Counter(tokens)
+        total_words = len(tokens)
+        probabilities = [count / total_words for count in freq_dist.values()]
+        return -sum(p * np.log2(p) for p in probabilities)
+    except:
+        return -999999
+def calculate_ttr(text: str) -> float:
+    try:
+        tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
+        return len(set(tokens)) / len(tokens) if tokens else -999999
+    except:
+        return -999999
+def get_fk_score(text: str) -> float:
+    try:
+        return textstat.flesch_kincaid_grade(text)
+    except:
+        return -999999
+def get_dc_score(text: str) -> float:
+    try:
+        return textstat.dale_chall_readability_score(text)
+    except:
+        return -999999
+# === Model Setup ===
+tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")
+class DebertaEmbeddingLayer(Layer):
+    def __init__(self, **kwargs):
+        super(DebertaEmbeddingLayer, self).__init__(**kwargs)
+        self.deberta = TFAutoModel.from_pretrained("microsoft/deberta-v3-base")
+    def call(self, inputs):
+        input_ids, attention_mask = inputs
+        outputs = self.deberta(input_ids, attention_mask=tf.cast(attention_mask, dtype=tf.int32))
+        return outputs.last_hidden_state
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0][0], input_shape[0][1], 768)
+model = tf.keras.models.load_model("models/hybrid_lstm_model.keras", custom_objects={"DebertaEmbeddingLayer": DebertaEmbeddingLayer})
+# === Preprocessing ===
+def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer, max_length=512):
+    combined_text = prompt + " " + response_a + " " + response_b
+    encoded = tokenizer(
+        [combined_text],
+        padding="max_length",
+        truncation=True,
+        max_length=max_length,
+        return_tensors="tf"
+    )
+    metrics = np.array([
+        get_fk_score(response_a),
+        get_fk_score(response_b),
+        get_dc_score(response_a),
+        get_dc_score(response_b),
+        calculate_ttr(response_a),
+        calculate_ttr(response_b),
+        calculate_entropy(response_a),
+        calculate_entropy(response_b)
+    ]).reshape(1, -1).astype(np.float32)
+    return encoded["input_ids"], encoded["attention_mask"], metrics
+# === Streamlit UI ===
+st.set_page_config(page_title="LMSYS Demo", layout="wide")
+# Optional styling (vintage theme)
+st.markdown(
+    """
+    <style>
+    * {
+        font-family: 'Georgia', serif !important;
+    }
+    .stButton>button {
+        background-color: #C2B280;
+        color: #3B2F2F;
+        border-radius: 8px;
+        border: 1px solid #7C3E2E;
+    }
+    .stButton>button:hover {
+        background-color: #A67B5B;
+        color: white;
+    }
+    .stTextInput>div>div>input {
+        background-color: #fdf6e3;
+        color: #3B2F2F;
+        border-radius: 4px;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+st.title("Predicting Human Preference : Gemini vs DeepSeek")
+st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
+st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
+st.sidebar.title("Ask a Question!")
+question = st.sidebar.text_area("Enter your question:", key="prompt_input")
+# Init session state
+if "generated" not in st.session_state:
+    st.session_state["generated"] = False
+# Generate responses
+if st.sidebar.button("Generate Responses") and question:
+    with st.spinner("Generating LLM responses..."):
+        raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
+        raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
+        st.session_state["response_a_raw"] = raw_a
+        st.session_state["response_b_raw"] = raw_b
+        st.session_state["response_a_clean"] = clean_response(raw_a)
+        st.session_state["response_b_clean"] = clean_response(raw_b)
+        st.session_state["generated"] = True
+        st.session_state["prediction"] = None
+# Display and interact
+if st.session_state["generated"]:
+    tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
+    with tab1:
+        st.subheader("Model Responses")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("#### Gemini")
+            st.markdown(st.session_state["response_a_raw"])
+        with col2:
+            st.markdown("#### DeepSeek")
+            st.markdown(st.session_state["response_b_raw"])
+        if st.button("Predict Winner"):
+            with st.spinner("Running model..."):
+                input_ids, attention_mask, num_features = preprocess_inputs(
+                    question,
+                    st.session_state["response_a_clean"],
+                    st.session_state["response_b_clean"],
+                    tokenizer
+                )
+                predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
+                predicted_class = np.argmax(predictions, axis=-1)[0]
+                label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
+                st.session_state["prediction"] = label_map[predicted_class]
+        if st.session_state.get("prediction"):
+            st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
+    with tab2:
+        st.subheader("Model Architecture")
+        st.image("images/arch.png", caption="Dual-LSTM + Attention + Numerical Features")
+    with tab3:
+        st.subheader("Training vs Validation Metrics")
+        st.markdown("### RNN")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_column_width=True)
+        with col2:
+            st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_column_width=True)
+        st.markdown("### LSTM")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_column_width=True)
+        with col2:
+            st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_column_width=True)
+        st.markdown("### Bi-LSTM")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_column_width=True)
+        with col2:
+            st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_column_width=True)
+        st.markdown("### Hybrid (Dual-LSTM)")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_column_width=True)
+        with col2:
+            st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_column_width=True)
+        st.markdown("### Hybrid (Bi-LSTM)")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_column_width=True)
+        with col2:
+            st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_column_width=True)