yashphalle commited on
Commit
73e70b2
·
verified ·
1 Parent(s): 5554a1e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import re
4
+
5
+ # -----------------------------
6
+ # 1. Configure the open-source LLM API endpoint
7
+ # For demonstration, we can use a hosted inference API on Hugging Face
8
+ # that is free to use (to a certain rate limit).
9
+ # -----------------------------
10
+ # Example: We'll use an OpenAssistant model endpoint on HF.
11
+ # You can find many such endpoints in the Hugging Face "Spaces" or "Models" section
12
+ # that provide Inference API for free.
13
+
14
+ API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
15
+ headers = {"Authorization": "Bearer HF_API_TOKEN"} # If needed; if the model doesn't require a token, leave blank or remove.
16
+
17
+ # -----------------------------
18
+ # 2. Define a function to query the model
19
+ # -----------------------------
20
+ def query_model(prompt: str) -> str:
21
+ """
22
+ Sends the prompt to the Hugging Face Inference Endpoint and returns the model's response.
23
+ """
24
+ # The payload format for text generation can vary by model. We'll try a general approach:
25
+ payload = {
26
+ "inputs": prompt,
27
+ "parameters": {
28
+ "max_new_tokens": 200, # limit response length
29
+ "temperature": 0.7, # moderate creativity
30
+ }
31
+ }
32
+ response = requests.post(API_URL, headers=headers, json=payload)
33
+ if response.status_code == 200:
34
+ model_output = response.json()
35
+ # "generated_text" or "text" can vary depending on the model
36
+ if isinstance(model_output, dict) and "generated_text" in model_output:
37
+ return model_output["generated_text"]
38
+ elif isinstance(model_output, list) and len(model_output) > 0:
39
+ # Some endpoints return a list of dicts
40
+ return model_output[0].get("generated_text", "")
41
+ else:
42
+ return "Error: Unexpected model output format."
43
+ else:
44
+ return f"Error {response.status_code}: {response.text}"
45
+
46
+ # -----------------------------
47
+ # 3. Define a simple evaluation function
48
+ # This is a naive "keyword and structure" based scoring for demonstration.
49
+ # -----------------------------
50
+ def evaluate_response(response: str) -> dict:
51
+ """
52
+ Rates the response on a scale of 1–5 for:
53
+ 1) Relevance (R)
54
+ 2) Depth (D)
55
+ 3) Clarity (C)
56
+ 4) References (E)
57
+ 5) Overall Quality (Q)
58
+ Returns a dict with individual scores and total.
59
+ """
60
+ # We'll do a very simplistic approach:
61
+ # Relevance: presence of 'remote work' or synonyms + mention of 'software engineers'
62
+ relevance = 5 if ("remote work" in response.lower() and "software engineer" in response.lower()) else 3
63
+
64
+ # Depth: check if the text is > 100 words or includes multiple paragraphs
65
+ word_count = len(response.split())
66
+ depth = 5 if word_count > 150 else (4 if word_count > 80 else 3)
67
+
68
+ # Clarity: check if there's a mention of 'introduction'/'conclusion' or if it has multiple paragraphs
69
+ paragraphs = response.strip().split("\n\n")
70
+ clarity = 5 if len(paragraphs) >= 2 else 3
71
+
72
+ # References: look for something like 'reference', 'source', 'citation', or an URL
73
+ if re.search(r"reference|source|citation|http", response, re.IGNORECASE):
74
+ references = 5
75
+ else:
76
+ references = 2
77
+
78
+ # Overall Quality: a naive combination
79
+ # We'll penalize if the text is too short or if it's obviously incomplete
80
+ if "..." in response[-10:]:
81
+ # If it ends with ... maybe it's incomplete
82
+ overall = 3
83
+ else:
84
+ overall = 5 if (relevance >= 4 and depth >= 4 and references >= 4) else 4
85
+
86
+ # Summation
87
+ total_score = relevance + depth + clarity + references + overall
88
+
89
+ return {
90
+ "Relevance": relevance,
91
+ "Depth": depth,
92
+ "Clarity": clarity,
93
+ "References": references,
94
+ "Overall": overall,
95
+ "Total": total_score
96
+ }
97
+
98
+ # -----------------------------
99
+ # 4. Define the Gradio interface function
100
+ # This is the function that runs when user clicks "Generate & Evaluate"
101
+ # -----------------------------
102
+ def generate_and_evaluate(prompt: str):
103
+ if not prompt.strip():
104
+ return "Please enter a prompt.", {}
105
+
106
+ # 1) Get LLM response
107
+ llm_response = query_model(prompt)
108
+
109
+ # 2) Evaluate
110
+ scores = evaluate_response(llm_response)
111
+
112
+ return llm_response, scores
113
+
114
+ # -----------------------------
115
+ # 5. Build the Gradio UI
116
+ # -----------------------------
117
+ with gr.Blocks() as demo:
118
+ gr.Markdown("# Remote Work Benefits Generator & Evaluator")
119
+ gr.Markdown(
120
+ "Enter a prompt about the key benefits of remote work for software engineers. "
121
+ "The model will generate a response and our auto-evaluator will score it."
122
+ )
123
+
124
+ prompt_input = gr.Textbox(
125
+ label="Enter your prompt here",
126
+ placeholder="E.g., 'Write a short report on the benefits of remote work for software engineers...'",
127
+ lines=3
128
+ )
129
+
130
+ generate_button = gr.Button("Generate & Evaluate")
131
+
132
+ response_output = gr.Textbox(
133
+ label="LLM Response",
134
+ lines=10
135
+ )
136
+
137
+ score_output = gr.JSON(
138
+ label="Evaluation Scores",
139
+ visible=True
140
+ )
141
+
142
+ generate_button.click(
143
+ fn=generate_and_evaluate,
144
+ inputs=[prompt_input],
145
+ outputs=[response_output, score_output]
146
+ )
147
+
148
+ # -----------------------------
149
+ # 6. Launch
150
+ # -----------------------------
151
+ if __name__ == "__main__":
152
+ demo.launch()