rbiswasfc commited on
Commit
aff259f
·
1 Parent(s): a0c9e19

curation app

Browse files
Files changed (5) hide show
  1. Dockerfile +15 -0
  2. README.md +1 -1
  3. app.py +229 -0
  4. config.ini +5 -0
  5. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY --link --chown=1000 . .
6
+
7
+ RUN mkdir -p /tmp/cache/
8
+ RUN mkdir -p /.cache
9
+ RUN chmod a+rwx -R /tmp/cache/
10
+ RUN chmod a+rwx -R /.cache
11
+ ENV HF_HUB_CACHE=HF_HOME
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ ENV PYTHONUNBUFFERED=1 PORT=7860
15
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -8,4 +8,4 @@ pinned: false
8
  license: apache-2.0
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  license: apache-2.0
9
  ---
10
 
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ from collections import Counter
4
+
5
+ import dotenv
6
+ from datasets import load_dataset
7
+ from fasthtml.common import *
8
+ from fasthtml_hf import setup_hf_backup
9
+ from fastlite import database
10
+
11
+ dotenv.load_dotenv()
12
+
13
+ fact_dataset = load_dataset("griffin/iclr2025_data_scores", split="train").to_list()
14
+ fact_dataset = [{"example_id": i, **example} for i, example in enumerate(fact_dataset)]
15
+
16
+ db = database("data/examples.db")
17
+ examples = db.t.examples
18
+ if examples not in db.t:
19
+ examples.create(
20
+ id=int,
21
+ example_id=int,
22
+ question_type=str,
23
+ question=str,
24
+ answer=str,
25
+ decision=str,
26
+ pk="id",
27
+ )
28
+
29
+ evaluated_ids = set(row["example_id"] for row in examples.rows)
30
+ question_types = sorted(set(ex["question_type"] for ex in fact_dataset))
31
+
32
+
33
+ def get_stats():
34
+ total_examples = Counter(ex["question_type"] for ex in fact_dataset)
35
+ curated_examples = Counter(row["question_type"] for row in examples.rows)
36
+ stats = {
37
+ qt: {"total": total_examples[qt], "curated": curated_examples[qt]}
38
+ for qt in question_types
39
+ }
40
+ return stats
41
+
42
+
43
+ def get_example(selected_type=None):
44
+ available_examples = [
45
+ ex for ex in fact_dataset if ex["example_id"] not in evaluated_ids
46
+ ]
47
+ if selected_type:
48
+ available_examples = [
49
+ ex for ex in available_examples if ex["question_type"] == selected_type
50
+ ]
51
+
52
+ if not available_examples:
53
+ return None
54
+
55
+ example = random.choice(available_examples)
56
+
57
+ keep_keys = [
58
+ "example_id",
59
+ "question_type",
60
+ "question",
61
+ "rationale",
62
+ "answer",
63
+ "log_ll",
64
+ "oracle_log_ll",
65
+ "oracle_advantage",
66
+ "prediction",
67
+ "prediction_oracle",
68
+ "accuracy",
69
+ "accuracy_oracle",
70
+ "accuracy_status",
71
+ ]
72
+
73
+ return {k: example[k] for k in keep_keys if k in example}
74
+
75
+
76
+ # app
77
+ style = Style("""
78
+ body { background-color: #1e1e1e; color: #d4d4d4; font-family: Arial, sans-serif; }
79
+ h1, h2, h3 { color: #61dafb; }
80
+ .example-container { margin-top: 20px; }
81
+ .example-table { border-collapse: collapse; width: 100%; }
82
+ .example-table th, .example-table td { border: 1px solid #3a3a3a; padding: 8px; text-align: left; }
83
+ .example-table th { background-color: #2a2a2a; color: #61dafb; }
84
+ .example-table td { color: #d4d4d4; }
85
+ #evaluation-form { margin-top: 20px; }
86
+ #evaluation-form button { margin-right: 10px; background-color: #0e639c; color: white; border: none; padding: 10px 20px; cursor: pointer; }
87
+ #evaluation-form button:hover { background-color: #1177bb; }
88
+ select { background-color: #2a2a2a; color: #d4d4d4; border: 1px solid #3a3a3a; padding: 5px; }
89
+ a { color: #61dafb; text-decoration: none; }
90
+ a:hover { text-decoration: underline; }
91
+ """)
92
+
93
+ app, rt = fast_app(live=True, hdrs=(style,))
94
+
95
+
96
+ def render_stats(stats):
97
+ return Table(
98
+ Tr(Th("Question Type"), Th("Curated"), Th("Total")),
99
+ *[
100
+ Tr(
101
+ Td(qt),
102
+ Td(
103
+ f"{stats[qt]['curated']} ({stats[qt]['curated']/stats[qt]['total']:.1%})"
104
+ ),
105
+ Td(stats[qt]["total"]),
106
+ )
107
+ for qt in question_types
108
+ ],
109
+ cls="stats-table",
110
+ )
111
+
112
+
113
+ def render_example(example):
114
+ return Div(
115
+ H3("Example Details"),
116
+ Table(
117
+ *[Tr(Th(key), Td(str(value))) for key, value in example.items()],
118
+ cls="example-table",
119
+ ),
120
+ id="example-details",
121
+ )
122
+
123
+
124
+ @rt("/")
125
+ async def get(question_type: str = None):
126
+ stats = get_stats()
127
+ example = get_example(question_type)
128
+
129
+ dropdown = Select(
130
+ Option("Question Types", value="", selected=question_type is None),
131
+ *[Option(qt, value=qt, selected=qt == question_type) for qt in question_types],
132
+ name="question_type",
133
+ hx_get="/",
134
+ hx_target="body",
135
+ hx_push_url="true",
136
+ )
137
+
138
+ if example is None:
139
+ content = Div(
140
+ H2("All examples of this type have been evaluated!"), render_stats(stats)
141
+ )
142
+ else:
143
+ content = Div(
144
+ H2("Example"),
145
+ Div(
146
+ render_example(example),
147
+ Form(
148
+ Button(
149
+ "Good Example",
150
+ name="decision",
151
+ value="good",
152
+ hx_post="/evaluate",
153
+ hx_target="#example-details",
154
+ ),
155
+ Button(
156
+ "Bad Example",
157
+ name="decision",
158
+ value="bad",
159
+ hx_post="/evaluate",
160
+ hx_target="#example-details",
161
+ ),
162
+ Hidden(name="example", value=json.dumps(example)),
163
+ id="evaluation-form",
164
+ ),
165
+ id="example-container",
166
+ ),
167
+ )
168
+
169
+ view_stats_link = A("Curation Stats", href="/stats", cls="view-stats-link")
170
+
171
+ return Titled(
172
+ "Example Curation",
173
+ H2("Question Type Selection"),
174
+ dropdown,
175
+ content,
176
+ view_stats_link,
177
+ )
178
+
179
+
180
+ @rt("/stats")
181
+ async def get():
182
+ stats = get_stats()
183
+ stats = render_stats(stats)
184
+
185
+ return Titled(
186
+ "Curation Statistics",
187
+ Div(
188
+ stats,
189
+ A("Back to Curation", href="/", cls="back-link"),
190
+ cls="container",
191
+ ),
192
+ )
193
+
194
+
195
+ @rt("/evaluate")
196
+ async def post(decision: str, example: str):
197
+ example_dict = json.loads(example)
198
+
199
+ # Insert the evaluated example into the database
200
+ examples.insert(
201
+ {
202
+ "id": len(list(examples.rows)) + 1, # Auto-increment ID
203
+ "example_id": example_dict["example_id"],
204
+ "question_type": example_dict["question_type"],
205
+ "question": example_dict["question"],
206
+ "answer": example_dict["answer"],
207
+ "decision": decision,
208
+ }
209
+ )
210
+
211
+ # Add the evaluated example's ID to the set of evaluated IDs
212
+ evaluated_ids.add(example_dict["example_id"])
213
+ # Get a new example
214
+ new_example = get_example(example_dict["question_type"])
215
+
216
+ if new_example is None:
217
+ return Div(H2("All examples of this type have been evaluated!"))
218
+ else:
219
+ return render_example(new_example)
220
+
221
+
222
+ # serve()
223
+ if __name__ == "__main__":
224
+ import os
225
+
226
+ import uvicorn
227
+
228
+ setup_hf_backup(app)
229
+ uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
config.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [DEFAULT]
2
+ dataset_id = single-fact-curation-iclr-eval
3
+ db_dir = data
4
+ private_backup = True
5
+ interval = 2 # number of minutes between periodic backups
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fasthtml-hf>=0.1.1
2
+ python-fasthtml>=0.0.8
3
+ huggingface-hub>=0.20.0
4
+ uvicorn>=0.29
5
+ pandas
6
+ numpy
7
+ python-dotenv
8
+ datasets