Presidentlin commited on
Commit
f5f5cd4
·
1 Parent(s): eebf495
Files changed (3) hide show
  1. __pycache__/main.cpython-310.pyc +0 -0
  2. app.py +38 -9
  3. main.py +3 -5
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
app.py CHANGED
@@ -35,30 +35,59 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
35
  try:
36
  response = requests.get("https://openrouter.ai/api/v1/models")
37
  response.raise_for_status() # Raise an exception for bad status codes
38
- models = response.json()["data"]
39
-
40
  # Sort models alphabetically by their ID
41
- models.sort(key=lambda model: model["id"])
 
 
 
 
 
42
 
43
- model_names = [model["id"] for model in models]
44
- judge_models = [model["id"] for model in models if "gpt" in model["id"]] # Example criteria
45
  except requests.exceptions.RequestException as e:
46
  st.error(f"Error fetching models from OpenRouter API: {e}")
47
  model_names = [] # Provide an empty list if API call fails
 
48
 
49
  # Model Selection
50
  if model_names:
51
- model_name = st.selectbox("Select a Language Model", model_names)
 
 
 
 
 
 
 
 
 
 
 
 
52
  else:
53
  st.error("No models available. Please check your API connection.")
54
- st.stop() # Stop execution if no models are available
55
 
56
  # Judge Model Selection
57
  if judge_models:
58
  judge_model_name = st.selectbox("Select a Judge Model", judge_models)
 
 
 
 
 
 
 
 
 
 
 
 
59
  else:
60
  st.error("No judge models available. Please check your API connection.")
61
- st.stop() # Stop execution if no judge models are available
 
62
 
63
  # Initialize session state for user_questions and predefined_questions
64
  if "user_questions" not in st.session_state:
@@ -138,7 +167,7 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
138
  "Question": result["question"],
139
  "Answer": answer,
140
  "Contestant Model": model_name,
141
- "Judge Model": 'openai/gpt-4o-mini',
142
  "Coherence Score": result["coherence_score"],
143
  "Novelty Score": result["novelty_score"]
144
  })
 
35
  try:
36
  response = requests.get("https://openrouter.ai/api/v1/models")
37
  response.raise_for_status() # Raise an exception for bad status codes
38
+ all_models = response.json()["data"]
 
39
  # Sort models alphabetically by their ID
40
+ all_models.sort(key=lambda model: model["id"])
41
+
42
+ # --- Create dictionaries for easy model lookup ---
43
+ models_by_id = {model["id"]: model for model in all_models}
44
+ judge_models = [model["id"] for model in all_models if "gpt" in model["id"]]
45
+ judge_models.sort()
46
 
47
+ model_names = list(models_by_id.keys())
 
48
  except requests.exceptions.RequestException as e:
49
  st.error(f"Error fetching models from OpenRouter API: {e}")
50
  model_names = [] # Provide an empty list if API call fails
51
+ judge_models = []
52
 
53
  # Model Selection
54
  if model_names:
55
+ model_name = st.selectbox("Select a Contestant Model", model_names)
56
+ # --- Display pricing for the selected model ---
57
+ selected_model = models_by_id.get(model_name)
58
+ if selected_model:
59
+ pricing_info = selected_model.get('pricing', {})
60
+ prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
61
+ completion_price = float(pricing_info.get("completion", 0)) * 1000000
62
+
63
+ # Display pricing information with increased precision
64
+ st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
65
+ st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
66
+ else:
67
+ st.write("**Pricing:** N/A")
68
  else:
69
  st.error("No models available. Please check your API connection.")
70
+ st.stop()
71
 
72
  # Judge Model Selection
73
  if judge_models:
74
  judge_model_name = st.selectbox("Select a Judge Model", judge_models)
75
+ # --- Display pricing for the selected judge model ---
76
+ selected_judge_model = models_by_id.get(judge_model_name)
77
+ if selected_judge_model:
78
+ pricing_info = selected_judge_model.get('pricing', {})
79
+ prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
80
+ completion_price = float(pricing_info.get("completion", 0)) * 1000000
81
+
82
+ # Display pricing information with increased precision
83
+ st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
84
+ st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
85
+ else:
86
+ st.write("**Pricing:** N/A")
87
  else:
88
  st.error("No judge models available. Please check your API connection.")
89
+ st.stop()
90
+
91
 
92
  # Initialize session state for user_questions and predefined_questions
93
  if "user_questions" not in st.session_state:
 
167
  "Question": result["question"],
168
  "Answer": answer,
169
  "Contestant Model": model_name,
170
+ "Judge Model": judge_model_name,
171
  "Coherence Score": result["coherence_score"],
172
  "Novelty Score": result["novelty_score"]
173
  })
main.py CHANGED
@@ -51,12 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
51
  if coherence_score is None:
52
  break
53
 
54
- if coherence_score <= 3:
55
  break
56
 
57
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
58
 
59
- if novelty_score < 0.1:
60
  break
61
 
62
 
@@ -158,9 +158,7 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
158
  st.write(f"**Novelty Score:** {result['novelty_score']}")
159
  results.extend(result["results"]) # Add results here
160
  novelty_score += result["novelty_score"] # Update novelty score
161
- st.write(
162
- f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>",
163
- unsafe_allow_html=True)
164
 
165
  elif result["type"] == "summary":
166
  st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
 
51
  if coherence_score is None:
52
  break
53
 
54
+ if coherence_score <= 6:
55
  break
56
 
57
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
58
 
59
+ if novelty_score < 0.5:
60
  break
61
 
62
 
 
158
  st.write(f"**Novelty Score:** {result['novelty_score']}")
159
  results.extend(result["results"]) # Add results here
160
  novelty_score += result["novelty_score"] # Update novelty score
161
+ st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
 
 
162
 
163
  elif result["type"] == "summary":
164
  st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",