Spaces:

KolumbusLindh
/

LLM-as-a-judge

Sleeping

App Files Files Community

Kolumbus Lindh commited on Dec 9, 2024

Commit

8f23865

1 Parent(s): 66cb564

updates

Browse files

Files changed (1) hide show

app.py +37 -24

app.py CHANGED Viewed

@@ -2,36 +2,27 @@ import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# Load the base LoRA evaluation model
-def load_lora_model():
-    repo_id = "KolumbusLindh/LoRA-4100"
-    model_file = "unsloth.F16.gguf"
     local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
-    print(f"Loading LoRA model from: {local_path}")
     return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
-lora_model = load_lora_model()
-print("LoRA model loaded successfully!")
-# Function to load a user-specified model
-def load_user_model(model_path):
-    print(f"Loading user model from: {model_path}")
-    return Llama(model_path=model_path, n_ctx=2048, n_threads=8)
 # Generate a response using the specified model and prompt
 def generate_response(model, prompt):
     response = model(prompt, max_tokens=256, temperature=0.7)
     return response["choices"][0]["text"]
-# Evaluate responses generated by two models using the LoRA model
-def evaluate_responses(prompt, model_a_path, model_b_path, evaluation_criteria):
     # Load user-specified models
-    model_a = load_user_model(model_a_path)
-    model_b = load_user_model(model_b_path)
     # Generate responses
-    response_a = generate_response(model_a, prompt)
-    response_b = generate_response(model_b, prompt)
     print(f"Response A: {response_a}")
     print(f"Response B: {response_b}")
@@ -55,13 +46,31 @@ Please evaluate the responses based on the criteria above. Rate each response on
     )
     return evaluation_response["choices"][0]["text"]
 # Gradio interface
 with gr.Blocks(title="LLM as a Judge") as demo:
     gr.Markdown("## LLM as a Judge 🧐")
-    # User inputs for models, prompt, and evaluation criteria
-    model_a_input = gr.Textbox(label="Model A Path or URL", placeholder="Enter the path or URL for Model A...")
-    model_b_input = gr.Textbox(label="Model B Path or URL", placeholder="Enter the path or URL for Model B...")
     prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
     criteria_dropdown = gr.Dropdown(
         label="Select Evaluation Criteria",
@@ -69,7 +78,11 @@ with gr.Blocks(title="LLM as a Judge") as demo:
         value="Clarity",
         type="value"
     )
     evaluate_button = gr.Button("Evaluate Models")
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
         placeholder="The evaluation results will appear here...",
@@ -80,10 +93,10 @@ with gr.Blocks(title="LLM as a Judge") as demo:
     # Link the evaluation function to the button
     evaluate_button.click(
         fn=evaluate_responses,
-        inputs=[prompt_input, model_a_input, model_b_input, criteria_dropdown],
         outputs=[evaluation_output]
     )
 # Launch the Gradio app
 if __name__ == "__main__":
-    demo.launch()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# Function to load a user-specified model from Hugging Face
+def load_user_model(repo_id, model_file):
+    print(f"Downloading model {model_file} from repository {repo_id}...")
     local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
+    print(f"Model downloaded to: {local_path}")
     return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
 # Generate a response using the specified model and prompt
 def generate_response(model, prompt):
     response = model(prompt, max_tokens=256, temperature=0.7)
     return response["choices"][0]["text"]
+# Evaluate responses generated by two models using the LoRA evaluation model
+def evaluate_responses(prompt, repo_a, model_a, repo_b, model_b, evaluation_criteria):
     # Load user-specified models
+    model_a_instance = load_user_model(repo_a, model_a)
+    model_b_instance = load_user_model(repo_b, model_b)
     # Generate responses
+    response_a = generate_response(model_a_instance, prompt)
+    response_b = generate_response(model_b_instance, prompt)
     print(f"Response A: {response_a}")
     print(f"Response B: {response_b}")
     )
     return evaluation_response["choices"][0]["text"]
+# Load the base LoRA evaluation model
+def load_lora_model():
+    repo_id = "KolumbusLindh/LoRA-4100"
+    model_file = "unsloth.F16.gguf"
+    print(f"Downloading LoRA evaluation model from repository {repo_id}...")
+    local_path = hf_hub_download(repo_id=repo_id, filename=model_file)
+    print(f"LoRA evaluation model downloaded to: {local_path}")
+    return Llama(model_path=local_path, n_ctx=2048, n_threads=8)
+lora_model = load_lora_model()
+print("LoRA evaluation model loaded successfully!")
 # Gradio interface
 with gr.Blocks(title="LLM as a Judge") as demo:
     gr.Markdown("## LLM as a Judge 🧐")
+    # Inputs for Model A repository and file
+    repo_a_input = gr.Textbox(label="Model A Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model A...")
+    model_a_input = gr.Textbox(label="Model A File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model A...")
+    # Inputs for Model B repository and file
+    repo_b_input = gr.Textbox(label="Model B Repository (e.g., KolumbusLindh/LoRA-4100)", placeholder="Enter the Hugging Face repo name for Model B...")
+    model_b_input = gr.Textbox(label="Model B File Name (e.g., unsloth.F16.gguf)", placeholder="Enter the model filename for Model B...")
+    # Input for prompt and evaluation criteria
     prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Enter the prompt here...", lines=3)
     criteria_dropdown = gr.Dropdown(
         label="Select Evaluation Criteria",
         value="Clarity",
         type="value"
     )
+    # Button to evaluate responses
     evaluate_button = gr.Button("Evaluate Models")
+    # Output for evaluation results
     evaluation_output = gr.Textbox(
         label="Evaluation Results",
         placeholder="The evaluation results will appear here...",
     # Link the evaluation function to the button
     evaluate_button.click(
         fn=evaluate_responses,
+        inputs=[prompt_input, repo_a_input, model_a_input, repo_b_input, model_b_input, criteria_dropdown],
         outputs=[evaluation_output]
     )
 # Launch the Gradio app
 if __name__ == "__main__":
+    demo.launch()  # Add share=True to create a public link