from openai import OpenAI from config import openai_api client = OpenAI(api_key=openai_api) def eval_answer(ANSWER_REFERENCE, ANSWER_TO_SCORE): system_prompt = f"""Your task is to evaluate how well a given answer fits with the following expected output (all sources and references should back up the given answers): ==================== EXPECTED OUTPUT {ANSWER_REFERENCE} ===================== You only output a float score between 1 and 5 with the following scale (sources, where information was used to answer, is a key critical expected element): 1 : out of topic, answer doesn't make sense 2 : misleading or false answer. 3: the answer makes sense but some parts of what is expected are missing or sources are missing 4: very good answer backed up by all valid sources, all key elements are present. Could be more clear 5: Perfect answer, nothing else was expected You output the score in the following json format: {{"score" : X, "rationale_based_on_scoring_rules" : "XXX"}} """ user_prompt = f""" Given answer: {ANSWER_TO_SCORE} """ response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": [ { "type": "text", "text": system_prompt } ] }, { "role": "user", "content": [ { "type": "text", "text": user_prompt } ] } ], temperature=0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, response_format={"type": "json_object"}, stream=False ).choices[0].message.content return response