Kolumbus Lindh commited on
Commit
47aec4f
·
1 Parent(s): ca0c241
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -11,13 +11,13 @@ def load_user_model(repo_id, model_file):
11
 
12
  # Generate a response using the specified model and prompt
13
  def generate_response(model, prompt):
14
- response = model(prompt, max_tokens=512, temperature=0.5)
15
  return response["choices"][0]["text"]
16
 
17
  # Evaluate responses using the LoRA evaluation model
18
  def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
19
  if len(evaluation_criteria) > 3:
20
- return "Error: Please select up to 3 evaluation criteria only."
21
 
22
  # Load models
23
  model_a_instance = load_user_model(repo_a, model_a)
@@ -47,17 +47,12 @@ Please evaluate the responses based on the selected criteria. For each criterion
47
  evaluation_response = lora_model.create_completion(
48
  prompt=evaluation_prompt,
49
  max_tokens=512,
50
- temperature=0.5
 
51
  )
52
  evaluation_results = evaluation_response["choices"][0]["text"]
53
 
54
- # Combine results for display
55
- final_output = f"""
56
- Response A:\n{response_a}\n\n
57
- Response B:\n{response_b}\n\n
58
- Evaluation Results:\n{evaluation_results}
59
- """
60
- return final_output
61
 
62
  # Load the LoRA evaluation model
63
  def load_lora_model():
@@ -73,26 +68,38 @@ print("LoRA evaluation model loaded successfully!")
73
 
74
  # Gradio interface
75
  with gr.Blocks(title="LLM as a Judge") as demo:
76
- gr.Markdown("## LLM as a Judge 🧐")
77
 
78
  # Model inputs
79
- repo_a_input = gr.Textbox(label="Model A Repository", placeholder="Enter the Hugging Face repo name for Model A...")
80
- model_a_input = gr.Textbox(label="Model A File Name", placeholder="Enter the model filename for Model A...")
81
- repo_b_input = gr.Textbox(label="Model B Repository", placeholder="Enter the Hugging Face repo name for Model B...")
82
- model_b_input = gr.Textbox(label="Model B File Name", placeholder="Enter the model filename for Model B...")
83
 
84
  # Prompt and criteria inputs
85
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
86
  criteria_dropdown = gr.CheckboxGroup(
87
- label="Select Up to 3 Evaluation Criteria",
88
- choices=["Clarity", "Completeness", "Accuracy", "Relevance", "User-Friendliness", "Depth", "Creativity"]
89
  )
90
 
91
  # Button and outputs
92
  evaluate_button = gr.Button("Evaluate Models")
 
 
 
 
 
 
 
 
 
 
 
 
93
  evaluation_output = gr.Textbox(
94
  label="Evaluation Results",
95
- placeholder="The evaluation results will appear here...",
96
  lines=20,
97
  interactive=False
98
  )
@@ -101,7 +108,7 @@ with gr.Blocks(title="LLM as a Judge") as demo:
101
  evaluate_button.click(
102
  fn=evaluate_responses,
103
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
104
- outputs=[evaluation_output]
105
  )
106
 
107
  # Launch app
 
11
 
12
  # Generate a response using the specified model and prompt
13
  def generate_response(model, prompt):
14
+ response = model(prompt, max_tokens=512, temperature=0.5, top_p=0.95)
15
  return response["choices"][0]["text"]
16
 
17
  # Evaluate responses using the LoRA evaluation model
18
  def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
19
  if len(evaluation_criteria) > 3:
20
+ return "Error: Please select up to 3 evaluation criteria only.", "", ""
21
 
22
  # Load models
23
  model_a_instance = load_user_model(repo_a, model_a)
 
47
  evaluation_response = lora_model.create_completion(
48
  prompt=evaluation_prompt,
49
  max_tokens=512,
50
+ temperature=0.5,
51
+ top_p=0.95,
52
  )
53
  evaluation_results = evaluation_response["choices"][0]["text"]
54
 
55
+ return response_a, response_b, evaluation_results
 
 
 
 
 
 
56
 
57
  # Load the LoRA evaluation model
58
  def load_lora_model():
 
68
 
69
  # Gradio interface
70
  with gr.Blocks(title="LLM as a Judge") as demo:
71
+ gr.Markdown("## LLM as a Judge 𐄷")
72
 
73
  # Model inputs
74
+ repo_a_input = gr.Textbox(label="Model A Repository", placeholder="KolumbusLindh/LoRA-6150")
75
+ model_a_input = gr.Textbox(label="Model A File Name", placeholder="unsloth.F16.gguf")
76
+ repo_b_input = gr.Textbox(label="Model B Repository", placeholder="forestav/LoRA-2000")
77
+ model_b_input = gr.Textbox(label="Model B File Name", placeholder="unsloth.F16.gguf")
78
 
79
  # Prompt and criteria inputs
80
  prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
81
  criteria_dropdown = gr.CheckboxGroup(
82
+ label="Select Evaluation Criteria (Max 3)",
83
+ choices=["Clarity", "Completeness", "Accuracy"] # Restricted criteria
84
  )
85
 
86
  # Button and outputs
87
  evaluate_button = gr.Button("Evaluate Models")
88
+ response_a_output = gr.Textbox(
89
+ label="Response A",
90
+ placeholder="Response from Model A will appear here...",
91
+ lines=10,
92
+ interactive=False
93
+ )
94
+ response_b_output = gr.Textbox(
95
+ label="Response B",
96
+ placeholder="Response from Model B will appear here...",
97
+ lines=10,
98
+ interactive=False
99
+ )
100
  evaluation_output = gr.Textbox(
101
  label="Evaluation Results",
102
+ placeholder="The evaluation analysis will appear here...",
103
  lines=20,
104
  interactive=False
105
  )
 
108
  evaluate_button.click(
109
  fn=evaluate_responses,
110
  inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
111
+ outputs=[response_a_output, response_b_output, evaluation_output]
112
  )
113
 
114
  # Launch app