Spaces:
Sleeping
Sleeping
File size: 4,019 Bytes
975a927 9ff00d4 975a927 bdf2140 975a927 bdf2140 975a927 bdf2140 975a927 bdf2140 975a927 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import random
import datetime as dt
import streamlit as st
from streamlit.logger import get_logger
from langchain.schema.messages import HumanMessage
from utils.mongo_utils import get_db_client, new_battle_result, get_non_assesed_comparison, new_completion_error
from app_config import ISSUES, SOURCES
logger = get_logger(__name__)
openai_api_key = os.environ['OPENAI_API_KEY']
if 'db_client' not in st.session_state:
st.session_state["db_client"] = get_db_client()
def disable_buttons():
return len(comparison) == 0
def replaceA():
new_battle_result(st.session_state['db_client'],
st.session_state['comparison_id'],
st.session_state['convo_id'],
username, sourceA, sourceB, winner='model_two'
)
def replaceB():
new_battle_result(st.session_state['db_client'],
st.session_state['comparison_id'],
st.session_state['convo_id'],
username, sourceA, sourceB, winner='model_one'
)
def regenerateBoth():
new_battle_result(st.session_state['db_client'],
st.session_state['comparison_id'],
st.session_state['convo_id'],
username, sourceA, sourceB, winner='both_bad'
)
def bothGood():
new_battle_result(st.session_state['db_client'],
st.session_state['comparison_id'],
st.session_state['convo_id'],
username, sourceA, sourceB, winner='tie'
)
def error2db(model):
logger.info(f"error logged for {model}")
new_completion_error(st.session_state['db_client'],
st.session_state['comparison_id'],
username, model
)
def error2dbA():
error2db(sourceA)
def error2dbB():
error2db(sourceB)
with st.sidebar:
username = st.text_input("Username", value='ivnban-ctl', max_chars=30)
comparison = get_non_assesed_comparison(st.session_state["db_client"], username)
with st.sidebar:
sbcol1, sbcol2 = st.columns(2)
beta = sbcol1.button("A is better", on_click=replaceB, disabled=disable_buttons())
betb = sbcol2.button("B is better", on_click=replaceA, disabled=disable_buttons())
same = sbcol1.button("Tie", on_click=bothGood, disabled=disable_buttons())
bbad = sbcol2.button("Both are bad", on_click=regenerateBoth, disabled=disable_buttons())
errorA = sbcol1.button("Error in A", on_click=error2dbA, disabled=disable_buttons())
errorB = sbcol2.button("Error in B", on_click=error2dbB, disabled=disable_buttons())
if len(comparison) > 0:
st.session_state['comparison_id'] = comparison[0]["_id"]
st.session_state['convo_id'] = comparison[0]["convo_id"]
st.session_state["disabled_buttons"] = False
st.sidebar.text_input("Issue", value=comparison[0]['convo_info'][0]['issue'], disabled=True)
st.title(f"💬 History")
for msg in comparison[0]['chat_history'].split("\n"):
parts = msg.split(":")
if len(parts) > 1:
role = "user" if parts[0] == 'helper' else "assistant"
st.chat_message(role).write(parts[1])
col1, col2 = st.columns(2)
col1.title(f"💬 Simulator A")
col2.title(f"💬 Simulator B")
selectedA = random.choice(['model_one', 'model_two'])
selectedB = "model_two" if selectedA == "model_one" else "model_one"
sourceA = comparison[0]['convo_info'][0][selectedA]
sourceB = comparison[0]['convo_info'][0][selectedB]
logger.info(f"selected A is {sourceA} and B is {sourceB}")
col1.chat_message("user").write(comparison[0]["prompt"])
col2.chat_message("user").write(comparison[0]["prompt"])
col1.chat_message("assistant").write(comparison[0][f"compeltion_{selectedA}"])
col2.chat_message("assistant").write(comparison[0][f"compeltion_{selectedB}"])
else:
st.write("No Comparisons left to Check") |