Spaces:

Presidentlin
/

Aidan-Bench

Runtime error

App Files Files Community

Presidentlin commited on Aug 13, 2024

Commit

f5f5cd4

1 Parent(s): eebf495

x

Browse files

Files changed (3) hide show

__pycache__/main.cpython-310.pyc +0 -0
app.py +38 -9
main.py +3 -5

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -35,30 +35,59 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
     try:
         response = requests.get("https://openrouter.ai/api/v1/models")
         response.raise_for_status()  # Raise an exception for bad status codes
-        models = response.json()["data"]
         # Sort models alphabetically by their ID
-        models.sort(key=lambda model: model["id"])
-        model_names = [model["id"] for model in models]
-        judge_models = [model["id"] for model in models if "gpt" in model["id"]]  # Example criteria
     except requests.exceptions.RequestException as e:
         st.error(f"Error fetching models from OpenRouter API: {e}")
         model_names = []  # Provide an empty list if API call fails
     # Model Selection
     if model_names:
-        model_name = st.selectbox("Select a Language Model", model_names)
     else:
         st.error("No models available. Please check your API connection.")
-        st.stop()  # Stop execution if no models are available
     # Judge Model Selection
     if judge_models:
         judge_model_name = st.selectbox("Select a Judge Model", judge_models)
     else:
         st.error("No judge models available. Please check your API connection.")
-        st.stop()  # Stop execution if no judge models are available
     # Initialize session state for user_questions and predefined_questions
     if "user_questions" not in st.session_state:
@@ -138,7 +167,7 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
                         "Question": result["question"],
                         "Answer": answer,
                         "Contestant Model": model_name,
-                        "Judge Model": 'openai/gpt-4o-mini',
                         "Coherence Score": result["coherence_score"],
                         "Novelty Score": result["novelty_score"]
                     })

     try:
         response = requests.get("https://openrouter.ai/api/v1/models")
         response.raise_for_status()  # Raise an exception for bad status codes
+        all_models = response.json()["data"]
         # Sort models alphabetically by their ID
+        all_models.sort(key=lambda model: model["id"])
+        # --- Create dictionaries for easy model lookup ---
+        models_by_id = {model["id"]: model for model in all_models}
+        judge_models = [model["id"] for model in all_models if "gpt" in model["id"]]
+        judge_models.sort()
+        model_names = list(models_by_id.keys())
     except requests.exceptions.RequestException as e:
         st.error(f"Error fetching models from OpenRouter API: {e}")
         model_names = []  # Provide an empty list if API call fails
+        judge_models = []
     # Model Selection
     if model_names:
+        model_name = st.selectbox("Select a Contestant Model", model_names)
+        # --- Display pricing for the selected model ---
+        selected_model = models_by_id.get(model_name)
+        if selected_model:
+            pricing_info = selected_model.get('pricing', {})
+            prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
+            completion_price = float(pricing_info.get("completion", 0)) * 1000000
+            # Display pricing information with increased precision
+            st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
+            st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
+        else:
+            st.write("**Pricing:** N/A")
     else:
         st.error("No models available. Please check your API connection.")
+        st.stop()
     # Judge Model Selection
     if judge_models:
         judge_model_name = st.selectbox("Select a Judge Model", judge_models)
+        # --- Display pricing for the selected judge model ---
+        selected_judge_model = models_by_id.get(judge_model_name)
+        if selected_judge_model:
+            pricing_info = selected_judge_model.get('pricing', {})
+            prompt_price = float(pricing_info.get("prompt", 0)) * 1000000
+            completion_price = float(pricing_info.get("completion", 0)) * 1000000
+            # Display pricing information with increased precision
+            st.write(f"**Prompt Pricing:** ${prompt_price:.2f}/Million tokens (if applicable)")
+            st.write(f"**Completion Pricing:** ${completion_price:.2f}/Million tokens")
+        else:
+            st.write("**Pricing:** N/A")
     else:
         st.error("No judge models available. Please check your API connection.")
+        st.stop()
     # Initialize session state for user_questions and predefined_questions
     if "user_questions" not in st.session_state:
                         "Question": result["question"],
                         "Answer": answer,
                         "Contestant Model": model_name,
+                        "Judge Model": judge_model_name,
                         "Coherence Score": result["coherence_score"],
                         "Novelty Score": result["novelty_score"]
                     })

main.py CHANGED Viewed

@@ -51,12 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
             if coherence_score is None:
                 break
-            if coherence_score <= 3:
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
-            if novelty_score < 0.1:
                 break
@@ -158,9 +158,7 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
                     results.extend(result["results"])  # Add results here
                     novelty_score += result["novelty_score"]  # Update novelty score
-                    st.write(
-                        f"<span style='color:yellow'>Total novelty score across all questions (so far): {novelty_score}</span>",
-                        unsafe_allow_html=True)
                 elif result["type"] == "summary":
                     st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",

             if coherence_score is None:
                 break
+            if coherence_score <= 6:
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
+            if novelty_score < 0.5:
                 break
                     st.write(f"**Novelty Score:** {result['novelty_score']}")
                     results.extend(result["results"])  # Add results here
                     novelty_score += result["novelty_score"]  # Update novelty score
+                    st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
                 elif result["type"] == "summary":
                     st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",