coldn00dl3s commited on
Commit
407c110
·
verified ·
1 Parent(s): 02a8b39

Upload 18 files

Browse files
.GITIGNORE ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .streamlit
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENROUTER_API_KEY = "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"
2
+ GEMINI_API_KEY = "AIzaSyCd4ZeGpQkiI_eA0iKWCTmFDMCeQVihos4"
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ images/arch.png filter=lfs diff=lfs merge=lfs -text
37
+ models/hybrid_lstm_model.keras filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ base = "light"
3
+ primaryColor = "#7C3E2E"
4
+ backgroundColor = "#FAF3E0"
5
+ secondaryBackgroundColor = "#F5E1C8"
6
+ textColor = "#3B2F2F"
7
+ font = "serif"
8
+
9
+ [server]
10
+ runOnSave = true
.streamlit/secrets.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENROUTER_API_KEY = "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"
2
+ GEMINI_API_KEY = "AIzaSyCd4ZeGpQkiI_eA0iKWCTmFDMCeQVihos4"
app.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ import torch
4
+ import numpy as np
5
+ from collections import Counter
6
+ from nltk.tokenize import word_tokenize
7
+ import textstat
8
+ import json
9
+ import requests
10
+ import tensorflow as tf
11
+ from keras.layers import Layer
12
+ from transformers import DebertaV2Tokenizer, TFAutoModel
13
+ import streamlit as st
14
+ from google import genai
15
+ torch.classes.__path__ = []
16
+ # Download tokenizer data once
17
+ nltk.download('punkt', quiet=True)
18
+
19
+ # === Cleaning Function ===
20
+ def clean_response(text: str) -> str:
21
+ # Simple markdown cleaner
22
+ text = re.sub(r"[*_`#>\-\[\]()]", "", text)
23
+ text = re.sub(r"\s+", " ", text)
24
+ return text.strip()
25
+
26
+ # === Gemini API ===
27
+
28
+
29
+ def get_response_from_gemini(prompt: str, key) -> str:
30
+ gemini_client = genai.Client(api_key=key)
31
+ response = gemini_client.models.generate_content(
32
+ model="gemini-2.5-pro-exp-03-25",
33
+ contents=prompt,
34
+ )
35
+ return response.text.strip()
36
+
37
+ # === DeepSeek API ===
38
+ def get_response_from_deepseek(prompt: str, key) -> str:
39
+ response = requests.post(
40
+ url="https://openrouter.ai/api/v1/chat/completions",
41
+ headers={"Authorization": f"Bearer {key}"},
42
+ data=json.dumps({
43
+ "model": "deepseek/deepseek-r1:free",
44
+ "messages": [{"role": "user", "content": prompt}]
45
+ })
46
+ )
47
+ return response.json()["choices"][0]["message"]["content"]
48
+
49
+ # === Metrics ===
50
+ def calculate_entropy(text: str) -> float:
51
+ try:
52
+ tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
53
+ if not tokens:
54
+ return -999999
55
+ freq_dist = Counter(tokens)
56
+ total_words = len(tokens)
57
+ probabilities = [count / total_words for count in freq_dist.values()]
58
+ return -sum(p * np.log2(p) for p in probabilities)
59
+ except:
60
+ return -999999
61
+
62
+ def calculate_ttr(text: str) -> float:
63
+ try:
64
+ tokens = [token.lower() for token in word_tokenize(text) if token.isalnum()]
65
+ return len(set(tokens)) / len(tokens) if tokens else -999999
66
+ except:
67
+ return -999999
68
+
69
+ def get_fk_score(text: str) -> float:
70
+ try:
71
+ return textstat.flesch_kincaid_grade(text)
72
+ except:
73
+ return -999999
74
+
75
+ def get_dc_score(text: str) -> float:
76
+ try:
77
+ return textstat.dale_chall_readability_score(text)
78
+ except:
79
+ return -999999
80
+
81
+ # === Model Setup ===
82
+ tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")
83
+
84
+ class DebertaEmbeddingLayer(Layer):
85
+ def __init__(self, **kwargs):
86
+ super(DebertaEmbeddingLayer, self).__init__(**kwargs)
87
+ self.deberta = TFAutoModel.from_pretrained("microsoft/deberta-v3-base")
88
+
89
+ def call(self, inputs):
90
+ input_ids, attention_mask = inputs
91
+ outputs = self.deberta(input_ids, attention_mask=tf.cast(attention_mask, dtype=tf.int32))
92
+ return outputs.last_hidden_state
93
+
94
+ def compute_output_shape(self, input_shape):
95
+ return (input_shape[0][0], input_shape[0][1], 768)
96
+
97
+ model = tf.keras.models.load_model("models/hybrid_lstm_model.keras", custom_objects={"DebertaEmbeddingLayer": DebertaEmbeddingLayer})
98
+
99
+ # === Preprocessing ===
100
+ def preprocess_inputs(prompt: str, response_a: str, response_b: str, tokenizer, max_length=512):
101
+ combined_text = prompt + " " + response_a + " " + response_b
102
+ encoded = tokenizer(
103
+ [combined_text],
104
+ padding="max_length",
105
+ truncation=True,
106
+ max_length=max_length,
107
+ return_tensors="tf"
108
+ )
109
+ metrics = np.array([
110
+ get_fk_score(response_a),
111
+ get_fk_score(response_b),
112
+ get_dc_score(response_a),
113
+ get_dc_score(response_b),
114
+ calculate_ttr(response_a),
115
+ calculate_ttr(response_b),
116
+ calculate_entropy(response_a),
117
+ calculate_entropy(response_b)
118
+ ]).reshape(1, -1).astype(np.float32)
119
+ return encoded["input_ids"], encoded["attention_mask"], metrics
120
+
121
+ # === Streamlit UI ===
122
+ st.set_page_config(page_title="LMSYS Demo", layout="wide")
123
+
124
+ # Optional styling (vintage theme)
125
+ st.markdown(
126
+ """
127
+ <style>
128
+ * {
129
+ font-family: 'Georgia', serif !important;
130
+ }
131
+ .stButton>button {
132
+ background-color: #C2B280;
133
+ color: #3B2F2F;
134
+ border-radius: 8px;
135
+ border: 1px solid #7C3E2E;
136
+ }
137
+ .stButton>button:hover {
138
+ background-color: #A67B5B;
139
+ color: white;
140
+ }
141
+ .stTextInput>div>div>input {
142
+ background-color: #fdf6e3;
143
+ color: #3B2F2F;
144
+ border-radius: 4px;
145
+ }
146
+ </style>
147
+ """,
148
+ unsafe_allow_html=True
149
+ )
150
+
151
+ st.title("Predicting Human Preference : Gemini vs DeepSeek")
152
+ st.write("As part of this demo, we make use of two SOTA LLMs : [Gemini 2.5 Pro](https://deepmind.google/technologies/gemini/pro/) and [DeepSeek R1](https://api-docs.deepseek.com/news/news250120) and make them compete against each other on a given prompt (to be entered through the sidebar)")
153
+ st.write("Using our proposed hybrid model, we predict which response is more suited to be preferred by a human user.")
154
+ st.sidebar.title("Ask a Question!")
155
+ question = st.sidebar.text_area("Enter your question:", key="prompt_input")
156
+
157
+ # Init session state
158
+ if "generated" not in st.session_state:
159
+ st.session_state["generated"] = False
160
+
161
+ # Generate responses
162
+ if st.sidebar.button("Generate Responses") and question:
163
+ with st.spinner("Generating LLM responses..."):
164
+ raw_a = get_response_from_gemini(question, st.secrets["GEMINI_API_KEY"])
165
+ raw_b = get_response_from_deepseek(question, st.secrets["OPENROUTER_API_KEY"])
166
+
167
+ st.session_state["response_a_raw"] = raw_a
168
+ st.session_state["response_b_raw"] = raw_b
169
+ st.session_state["response_a_clean"] = clean_response(raw_a)
170
+ st.session_state["response_b_clean"] = clean_response(raw_b)
171
+
172
+ st.session_state["generated"] = True
173
+ st.session_state["prediction"] = None
174
+
175
+ # Display and interact
176
+ if st.session_state["generated"]:
177
+ tab1, tab2, tab3 = st.tabs(["Predictions","Model Architecture", "📈 Metric Curves"])
178
+
179
+ with tab1:
180
+ st.subheader("Model Responses")
181
+ col1, col2 = st.columns(2)
182
+ with col1:
183
+ st.markdown("#### Gemini")
184
+ st.markdown(st.session_state["response_a_raw"])
185
+ with col2:
186
+ st.markdown("#### DeepSeek")
187
+ st.markdown(st.session_state["response_b_raw"])
188
+
189
+
190
+ if st.button("Predict Winner"):
191
+ with st.spinner("Running model..."):
192
+ input_ids, attention_mask, num_features = preprocess_inputs(
193
+ question,
194
+ st.session_state["response_a_clean"],
195
+ st.session_state["response_b_clean"],
196
+ tokenizer
197
+ )
198
+ predictions = model.predict([input_ids, attention_mask, num_features], verbose=0)
199
+ predicted_class = np.argmax(predictions, axis=-1)[0]
200
+ label_map = {0: "Gemini!", 1: "DeepSeek!", 2: "Tie!"}
201
+ st.session_state["prediction"] = label_map[predicted_class]
202
+
203
+ if st.session_state.get("prediction"):
204
+ st.success(f"🤖 Model Prediction: {st.session_state['prediction']}")
205
+
206
+ with tab2:
207
+ st.subheader("Model Architecture")
208
+ st.image("images/arch.png", caption="Dual-LSTM + Attention + Numerical Features")
209
+
210
+ with tab3:
211
+ st.subheader("Training vs Validation Metrics")
212
+
213
+ st.markdown("### RNN")
214
+ col1, col2 = st.columns(2)
215
+ with col1:
216
+ st.image("images/plots/rnn_baseline_acc.png", caption="Accuracy - RNN", use_column_width=True)
217
+ with col2:
218
+ st.image("images/plots/rnn_baseline_loss.png", caption="Log Loss - RNN", use_column_width=True)
219
+
220
+ st.markdown("### LSTM")
221
+ col1, col2 = st.columns(2)
222
+ with col1:
223
+ st.image("images/plots/lstm_baseline_acc.png", caption="Accuracy - LSTM", use_column_width=True)
224
+ with col2:
225
+ st.image("images/plots/lstm_baseline_loss.png", caption="Log Loss - LSTM", use_column_width=True)
226
+
227
+ st.markdown("### Bi-LSTM")
228
+ col1, col2 = st.columns(2)
229
+ with col1:
230
+ st.image("images/plots/bilstm_baseline_acc.png", caption="Accuracy - Bi-LSTM", use_column_width=True)
231
+ with col2:
232
+ st.image("images/plots/bilstm_baseline_loss.png", caption="Log Loss - Bi-LSTM", use_column_width=True)
233
+
234
+ st.markdown("### Hybrid (Dual-LSTM)")
235
+ col1, col2 = st.columns(2)
236
+ with col1:
237
+ st.image("images/plots/duallstm_hybrid_acc.png", caption="Accuracy - Hybrid (Dual-LSTM)", use_column_width=True)
238
+ with col2:
239
+ st.image("images/plots/duallstm_hybrid_loss.png", caption="Log Loss - Hybrid (Dual-LSTM)", use_column_width=True)
240
+
241
+ st.markdown("### Hybrid (Bi-LSTM)")
242
+ col1, col2 = st.columns(2)
243
+ with col1:
244
+ st.image("images/plots/bilstm_hybrid_acc.png", caption="Accuracy - Hybrid (Bi-LSTM)", use_column_width=True)
245
+ with col2:
246
+ st.image("images/plots/bilstm_hybrid_loss.png", caption="Log Loss - Hybrid (Bi-LSTM)", use_column_width=True)
images/arch.png ADDED

Git LFS Details

  • SHA256: f329bd38bd6c973cfd8afac65147333db71a8595383bae773834b07bd03f0aba
  • Pointer size: 131 Bytes
  • Size of remote file: 127 kB
images/plots/bilstm_baseline_acc.png ADDED
images/plots/bilstm_baseline_loss.png ADDED
images/plots/bilstm_hybrid_acc.png ADDED
images/plots/bilstm_hybrid_loss.png ADDED
images/plots/duallstm_hybrid_acc.png ADDED
images/plots/duallstm_hybrid_loss.png ADDED
images/plots/lstm_baseline_acc.png ADDED
images/plots/lstm_baseline_loss.png ADDED
images/plots/rnn_baseline_acc.png ADDED
images/plots/rnn_baseline_loss.png ADDED
models/hybrid_lstm_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fcf934f1f7d7628373fd5b391b017f3bdce61d77cb35c0528cb7df7a073b579
3
+ size 6238662
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ torch
3
+ sentencepiece
4
+ tf-keras
5
+ transformers
6
+ google-genai
7
+ nltk
8
+ textstat
9
+ streamlit
10
+ pandas
11
+ numpy