Spaces:
Sleeping
Sleeping
curation app
Browse files- Dockerfile +15 -0
- README.md +1 -1
- app.py +229 -0
- config.ini +5 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY --link --chown=1000 . .
|
6 |
+
|
7 |
+
RUN mkdir -p /tmp/cache/
|
8 |
+
RUN mkdir -p /.cache
|
9 |
+
RUN chmod a+rwx -R /tmp/cache/
|
10 |
+
RUN chmod a+rwx -R /.cache
|
11 |
+
ENV HF_HUB_CACHE=HF_HOME
|
12 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
13 |
+
|
14 |
+
ENV PYTHONUNBUFFERED=1 PORT=7860
|
15 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -8,4 +8,4 @@ pinned: false
|
|
8 |
license: apache-2.0
|
9 |
---
|
10 |
|
11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
8 |
license: apache-2.0
|
9 |
---
|
10 |
|
11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import random
|
3 |
+
from collections import Counter
|
4 |
+
|
5 |
+
import dotenv
|
6 |
+
from datasets import load_dataset
|
7 |
+
from fasthtml.common import *
|
8 |
+
from fasthtml_hf import setup_hf_backup
|
9 |
+
from fastlite import database
|
10 |
+
|
11 |
+
dotenv.load_dotenv()
|
12 |
+
|
13 |
+
fact_dataset = load_dataset("griffin/iclr2025_data_scores", split="train").to_list()
|
14 |
+
fact_dataset = [{"example_id": i, **example} for i, example in enumerate(fact_dataset)]
|
15 |
+
|
16 |
+
db = database("data/examples.db")
|
17 |
+
examples = db.t.examples
|
18 |
+
if examples not in db.t:
|
19 |
+
examples.create(
|
20 |
+
id=int,
|
21 |
+
example_id=int,
|
22 |
+
question_type=str,
|
23 |
+
question=str,
|
24 |
+
answer=str,
|
25 |
+
decision=str,
|
26 |
+
pk="id",
|
27 |
+
)
|
28 |
+
|
29 |
+
evaluated_ids = set(row["example_id"] for row in examples.rows)
|
30 |
+
question_types = sorted(set(ex["question_type"] for ex in fact_dataset))
|
31 |
+
|
32 |
+
|
33 |
+
def get_stats():
|
34 |
+
total_examples = Counter(ex["question_type"] for ex in fact_dataset)
|
35 |
+
curated_examples = Counter(row["question_type"] for row in examples.rows)
|
36 |
+
stats = {
|
37 |
+
qt: {"total": total_examples[qt], "curated": curated_examples[qt]}
|
38 |
+
for qt in question_types
|
39 |
+
}
|
40 |
+
return stats
|
41 |
+
|
42 |
+
|
43 |
+
def get_example(selected_type=None):
|
44 |
+
available_examples = [
|
45 |
+
ex for ex in fact_dataset if ex["example_id"] not in evaluated_ids
|
46 |
+
]
|
47 |
+
if selected_type:
|
48 |
+
available_examples = [
|
49 |
+
ex for ex in available_examples if ex["question_type"] == selected_type
|
50 |
+
]
|
51 |
+
|
52 |
+
if not available_examples:
|
53 |
+
return None
|
54 |
+
|
55 |
+
example = random.choice(available_examples)
|
56 |
+
|
57 |
+
keep_keys = [
|
58 |
+
"example_id",
|
59 |
+
"question_type",
|
60 |
+
"question",
|
61 |
+
"rationale",
|
62 |
+
"answer",
|
63 |
+
"log_ll",
|
64 |
+
"oracle_log_ll",
|
65 |
+
"oracle_advantage",
|
66 |
+
"prediction",
|
67 |
+
"prediction_oracle",
|
68 |
+
"accuracy",
|
69 |
+
"accuracy_oracle",
|
70 |
+
"accuracy_status",
|
71 |
+
]
|
72 |
+
|
73 |
+
return {k: example[k] for k in keep_keys if k in example}
|
74 |
+
|
75 |
+
|
76 |
+
# app
|
77 |
+
style = Style("""
|
78 |
+
body { background-color: #1e1e1e; color: #d4d4d4; font-family: Arial, sans-serif; }
|
79 |
+
h1, h2, h3 { color: #61dafb; }
|
80 |
+
.example-container { margin-top: 20px; }
|
81 |
+
.example-table { border-collapse: collapse; width: 100%; }
|
82 |
+
.example-table th, .example-table td { border: 1px solid #3a3a3a; padding: 8px; text-align: left; }
|
83 |
+
.example-table th { background-color: #2a2a2a; color: #61dafb; }
|
84 |
+
.example-table td { color: #d4d4d4; }
|
85 |
+
#evaluation-form { margin-top: 20px; }
|
86 |
+
#evaluation-form button { margin-right: 10px; background-color: #0e639c; color: white; border: none; padding: 10px 20px; cursor: pointer; }
|
87 |
+
#evaluation-form button:hover { background-color: #1177bb; }
|
88 |
+
select { background-color: #2a2a2a; color: #d4d4d4; border: 1px solid #3a3a3a; padding: 5px; }
|
89 |
+
a { color: #61dafb; text-decoration: none; }
|
90 |
+
a:hover { text-decoration: underline; }
|
91 |
+
""")
|
92 |
+
|
93 |
+
app, rt = fast_app(live=True, hdrs=(style,))
|
94 |
+
|
95 |
+
|
96 |
+
def render_stats(stats):
|
97 |
+
return Table(
|
98 |
+
Tr(Th("Question Type"), Th("Curated"), Th("Total")),
|
99 |
+
*[
|
100 |
+
Tr(
|
101 |
+
Td(qt),
|
102 |
+
Td(
|
103 |
+
f"{stats[qt]['curated']} ({stats[qt]['curated']/stats[qt]['total']:.1%})"
|
104 |
+
),
|
105 |
+
Td(stats[qt]["total"]),
|
106 |
+
)
|
107 |
+
for qt in question_types
|
108 |
+
],
|
109 |
+
cls="stats-table",
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
def render_example(example):
|
114 |
+
return Div(
|
115 |
+
H3("Example Details"),
|
116 |
+
Table(
|
117 |
+
*[Tr(Th(key), Td(str(value))) for key, value in example.items()],
|
118 |
+
cls="example-table",
|
119 |
+
),
|
120 |
+
id="example-details",
|
121 |
+
)
|
122 |
+
|
123 |
+
|
124 |
+
@rt("/")
|
125 |
+
async def get(question_type: str = None):
|
126 |
+
stats = get_stats()
|
127 |
+
example = get_example(question_type)
|
128 |
+
|
129 |
+
dropdown = Select(
|
130 |
+
Option("Question Types", value="", selected=question_type is None),
|
131 |
+
*[Option(qt, value=qt, selected=qt == question_type) for qt in question_types],
|
132 |
+
name="question_type",
|
133 |
+
hx_get="/",
|
134 |
+
hx_target="body",
|
135 |
+
hx_push_url="true",
|
136 |
+
)
|
137 |
+
|
138 |
+
if example is None:
|
139 |
+
content = Div(
|
140 |
+
H2("All examples of this type have been evaluated!"), render_stats(stats)
|
141 |
+
)
|
142 |
+
else:
|
143 |
+
content = Div(
|
144 |
+
H2("Example"),
|
145 |
+
Div(
|
146 |
+
render_example(example),
|
147 |
+
Form(
|
148 |
+
Button(
|
149 |
+
"Good Example",
|
150 |
+
name="decision",
|
151 |
+
value="good",
|
152 |
+
hx_post="/evaluate",
|
153 |
+
hx_target="#example-details",
|
154 |
+
),
|
155 |
+
Button(
|
156 |
+
"Bad Example",
|
157 |
+
name="decision",
|
158 |
+
value="bad",
|
159 |
+
hx_post="/evaluate",
|
160 |
+
hx_target="#example-details",
|
161 |
+
),
|
162 |
+
Hidden(name="example", value=json.dumps(example)),
|
163 |
+
id="evaluation-form",
|
164 |
+
),
|
165 |
+
id="example-container",
|
166 |
+
),
|
167 |
+
)
|
168 |
+
|
169 |
+
view_stats_link = A("Curation Stats", href="/stats", cls="view-stats-link")
|
170 |
+
|
171 |
+
return Titled(
|
172 |
+
"Example Curation",
|
173 |
+
H2("Question Type Selection"),
|
174 |
+
dropdown,
|
175 |
+
content,
|
176 |
+
view_stats_link,
|
177 |
+
)
|
178 |
+
|
179 |
+
|
180 |
+
@rt("/stats")
|
181 |
+
async def get():
|
182 |
+
stats = get_stats()
|
183 |
+
stats = render_stats(stats)
|
184 |
+
|
185 |
+
return Titled(
|
186 |
+
"Curation Statistics",
|
187 |
+
Div(
|
188 |
+
stats,
|
189 |
+
A("Back to Curation", href="/", cls="back-link"),
|
190 |
+
cls="container",
|
191 |
+
),
|
192 |
+
)
|
193 |
+
|
194 |
+
|
195 |
+
@rt("/evaluate")
|
196 |
+
async def post(decision: str, example: str):
|
197 |
+
example_dict = json.loads(example)
|
198 |
+
|
199 |
+
# Insert the evaluated example into the database
|
200 |
+
examples.insert(
|
201 |
+
{
|
202 |
+
"id": len(list(examples.rows)) + 1, # Auto-increment ID
|
203 |
+
"example_id": example_dict["example_id"],
|
204 |
+
"question_type": example_dict["question_type"],
|
205 |
+
"question": example_dict["question"],
|
206 |
+
"answer": example_dict["answer"],
|
207 |
+
"decision": decision,
|
208 |
+
}
|
209 |
+
)
|
210 |
+
|
211 |
+
# Add the evaluated example's ID to the set of evaluated IDs
|
212 |
+
evaluated_ids.add(example_dict["example_id"])
|
213 |
+
# Get a new example
|
214 |
+
new_example = get_example(example_dict["question_type"])
|
215 |
+
|
216 |
+
if new_example is None:
|
217 |
+
return Div(H2("All examples of this type have been evaluated!"))
|
218 |
+
else:
|
219 |
+
return render_example(new_example)
|
220 |
+
|
221 |
+
|
222 |
+
# serve()
|
223 |
+
if __name__ == "__main__":
|
224 |
+
import os
|
225 |
+
|
226 |
+
import uvicorn
|
227 |
+
|
228 |
+
setup_hf_backup(app)
|
229 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|
config.ini
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[DEFAULT]
|
2 |
+
dataset_id = single-fact-curation-iclr-eval
|
3 |
+
db_dir = data
|
4 |
+
private_backup = True
|
5 |
+
interval = 2 # number of minutes between periodic backups
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fasthtml-hf>=0.1.1
|
2 |
+
python-fasthtml>=0.0.8
|
3 |
+
huggingface-hub>=0.20.0
|
4 |
+
uvicorn>=0.29
|
5 |
+
pandas
|
6 |
+
numpy
|
7 |
+
python-dotenv
|
8 |
+
datasets
|