Spaces:
Runtime error
Runtime error
Commit
·
f5f5cd4
1
Parent(s):
eebf495
- __pycache__/main.cpython-310.pyc +0 -0
- app.py +38 -9
- main.py +3 -5
__pycache__/main.cpython-310.pyc
CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -35,30 +35,59 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
|
|
35 |
try:
|
36 |
response = requests.get("https://openrouter.ai/api/v1/models")
|
37 |
response.raise_for_status() # Raise an exception for bad status codes
|
38 |
-
|
39 |
-
|
40 |
# Sort models alphabetically by their ID
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
model_names =
|
44 |
-
judge_models = [model["id"] for model in models if "gpt" in model["id"]] # Example criteria
|
45 |
except requests.exceptions.RequestException as e:
|
46 |
st.error(f"Error fetching models from OpenRouter API: {e}")
|
47 |
model_names = [] # Provide an empty list if API call fails
|
|
|
48 |
|
49 |
# Model Selection
|
50 |
if model_names:
|
51 |
-
model_name = st.selectbox("Select a
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
else:
|
53 |
st.error("No models available. Please check your API connection.")
|
54 |
-
st.stop()
|
55 |
|
56 |
# Judge Model Selection
|
57 |
if judge_models:
|
58 |
judge_model_name = st.selectbox("Select a Judge Model", judge_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
else:
|
60 |
st.error("No judge models available. Please check your API connection.")
|
61 |
-
st.stop()
|
|
|
62 |
|
63 |
# Initialize session state for user_questions and predefined_questions
|
64 |
if "user_questions" not in st.session_state:
|
@@ -138,7 +167,7 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
|
|
138 |
"Question": result["question"],
|
139 |
"Answer": answer,
|
140 |
"Contestant Model": model_name,
|
141 |
-
"Judge Model":
|
142 |
"Coherence Score": result["coherence_score"],
|
143 |
"Novelty Score": result["novelty_score"]
|
144 |
})
|
|
|
35 |
try:
|
36 |
response = requests.get("https://openrouter.ai/api/v1/models")
|
37 |
response.raise_for_status() # Raise an exception for bad status codes
|
38 |
+
all_models = response.json()["data"]
|
|
|
39 |
# Sort models alphabetically by their ID
|
40 |
+
all_models.sort(key=lambda model: model["id"])
|
41 |
+
|
42 |
+
# --- Create dictionaries for easy model lookup ---
|
43 |
+
models_by_id = {model["id"]: model for model in all_models}
|
44 |
+
judge_models = [model["id"] for model in all_models if "gpt" in model["id"]]
|
45 |
+
judge_models.sort()
|
46 |
|
47 |
+
model_names = list(models_by_id.keys())
|
|
|
48 |
except requests.exceptions.RequestException as e:
|
49 |
st.error(f"Error fetching models from OpenRouter API: {e}")
|
50 |
model_names = [] # Provide an empty list if API call fails
|
51 |
+
judge_models = []
|
52 |
|
53 |
# Model Selection
|
54 |
if model_names:
|
55 |
+
model_name = st.selectbox("Select a Contestant Model", model_names)
|
56 |
+
# --- Display pricing for the selected model ---
|
57 |
+
selected_model = models_by_id.get(model_name)
|
58 |
+
if selected_model:
|
59 |
+
pricing_info = selected_model.get('pricing', {})
|
60 |
+
prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
|
61 |
+
completion_price = float(pricing_info.get("completion", 0)) * 1000000
|
62 |
+
|
63 |
+
# Display pricing information with increased precision
|
64 |
+
st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
|
65 |
+
st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
|
66 |
+
else:
|
67 |
+
st.write("**Pricing:** N/A")
|
68 |
else:
|
69 |
st.error("No models available. Please check your API connection.")
|
70 |
+
st.stop()
|
71 |
|
72 |
# Judge Model Selection
|
73 |
if judge_models:
|
74 |
judge_model_name = st.selectbox("Select a Judge Model", judge_models)
|
75 |
+
# --- Display pricing for the selected judge model ---
|
76 |
+
selected_judge_model = models_by_id.get(judge_model_name)
|
77 |
+
if selected_judge_model:
|
78 |
+
pricing_info = selected_judge_model.get('pricing', {})
|
79 |
+
prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
|
80 |
+
completion_price = float(pricing_info.get("completion", 0)) * 1000000
|
81 |
+
|
82 |
+
# Display pricing information with increased precision
|
83 |
+
st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
|
84 |
+
st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
|
85 |
+
else:
|
86 |
+
st.write("**Pricing:** N/A")
|
87 |
else:
|
88 |
st.error("No judge models available. Please check your API connection.")
|
89 |
+
st.stop()
|
90 |
+
|
91 |
|
92 |
# Initialize session state for user_questions and predefined_questions
|
93 |
if "user_questions" not in st.session_state:
|
|
|
167 |
"Question": result["question"],
|
168 |
"Answer": answer,
|
169 |
"Contestant Model": model_name,
|
170 |
+
"Judge Model": judge_model_name,
|
171 |
"Coherence Score": result["coherence_score"],
|
172 |
"Novelty Score": result["novelty_score"]
|
173 |
})
|
main.py
CHANGED
@@ -51,12 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
|
|
51 |
if coherence_score is None:
|
52 |
break
|
53 |
|
54 |
-
if coherence_score <=
|
55 |
break
|
56 |
|
57 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
58 |
|
59 |
-
if novelty_score < 0.
|
60 |
break
|
61 |
|
62 |
|
@@ -158,9 +158,7 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
|
|
158 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
159 |
results.extend(result["results"]) # Add results here
|
160 |
novelty_score += result["novelty_score"] # Update novelty score
|
161 |
-
st.
|
162 |
-
f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>",
|
163 |
-
unsafe_allow_html=True)
|
164 |
|
165 |
elif result["type"] == "summary":
|
166 |
st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
|
|
|
51 |
if coherence_score is None:
|
52 |
break
|
53 |
|
54 |
+
if coherence_score <= 6:
|
55 |
break
|
56 |
|
57 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
58 |
|
59 |
+
if novelty_score < 0.5:
|
60 |
break
|
61 |
|
62 |
|
|
|
158 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
159 |
results.extend(result["results"]) # Add results here
|
160 |
novelty_score += result["novelty_score"] # Update novelty score
|
161 |
+
st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
|
|
|
|
|
162 |
|
163 |
elif result["type"] == "summary":
|
164 |
st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
|