XufengDuan commited on
Commit
ec7c10d
·
1 Parent(s): c150b24

update scripts

Browse files
Files changed (2) hide show
  1. app.py +188 -245
  2. src/backend/model_operations.py +36 -10
app.py CHANGED
@@ -51,41 +51,7 @@ original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_d
51
  leaderboard_df = original_df.copy()
52
 
53
  def process_pending_evals():
54
- # if len(pending_eval_queue_df) == 0:
55
- # print("No pending evaluations found.")
56
- # return
57
- #
58
- # for _, eval_request in pending_eval_queue_df.iterrows():
59
- # import re
60
- # model_link = eval_request['model']
61
- # match = re.search(r'>([^<]+)<', model_link)
62
- # if match:
63
- # eval_request['model'] = match.group(1) # 赋值给 eval_request['model']
64
- # else:
65
- # eval_request['model'] = model_link # 如果无法匹配,保留原始字符串
66
- #
67
- # print(f"Evaluating model: {eval_request['model']}")
68
- #
69
- # # 调用评估函数
70
- # run_eval_suite.run_evaluation(
71
- # eval_request=eval_request,
72
- # local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
73
- # results_repo=envs.RESULTS_REPO,
74
- # batch_size=1,
75
- # device=envs.DEVICE,
76
- # no_cache=True,
77
- # need_check=False, # 根据需要设定是否需要检查
78
- # write_results=False # 根据需要设定是否写入结果
79
- # )
80
- # print(f"Finished evaluation for model: {eval_request['model']}")
81
- # # Update the status to FINISHED
82
- # manage_requests.set_eval_request(
83
- # api=envs.API,
84
- # eval_request=eval_request,
85
- # new_status="FINISHED",
86
- # hf_repo=envs.QUEUE_REPO,
87
- # local_dir=envs.EVAL_REQUESTS_PATH_BACKEND
88
- # )
89
  current_pending_status = [PENDING_STATUS]
90
  print('_________________')
91
  manage_requests.check_completed_evals(
@@ -246,103 +212,88 @@ def filter_models(
246
 
247
  return filtered_df
248
 
 
 
 
 
 
249
 
250
- demo = gr.Blocks(css=custom_css)
251
- with demo:
252
- gr.HTML(about.TITLE)
253
- gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")
254
-
255
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
256
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
257
- with gr.Row():
258
- with gr.Column():
259
- with gr.Row():
260
- search_bar = gr.Textbox(
261
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
262
- show_label=False,
263
- elem_id="search-bar",
264
- )
265
- with gr.Row():
266
- shown_columns = gr.CheckboxGroup(
267
- choices=[
268
- c.name
269
- for c in utils.fields(utils.AutoEvalColumn)
270
- if not c.hidden and not c.never_hidden and not c.dummy
271
- ],
272
- value=[
273
- c.name
274
- for c in utils.fields(utils.AutoEvalColumn)
275
- if c.displayed_by_default and not c.hidden and not c.never_hidden
276
- ],
277
- label="Select columns to show",
278
- elem_id="column-select",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  interactive=True,
 
280
  )
281
- with gr.Row():
282
- deleted_models_visibility = gr.Checkbox(
283
- value=False, label="Show gated/private/deleted models", interactive=True
 
 
 
284
  )
285
- with gr.Column(min_width=320):
286
- #with gr.Box(elem_id="box-filter"):
287
- # filter_columns_type = gr.CheckboxGroup(
288
- # label="Model types",
289
- # choices=[t.to_str() for t in utils.ModelType],
290
- # value=[t.to_str() for t in utils.ModelType],
291
- # interactive=True,
292
- # elem_id="filter-columns-type",
293
- # )
294
- filter_columns_precision = gr.CheckboxGroup(
295
- label="Precision",
296
- choices=[i.value.name for i in utils.Precision],
297
- value=[i.value.name for i in utils.Precision],
298
- interactive=True,
299
- elem_id="filter-columns-precision",
300
- )
301
- filter_columns_size = gr.CheckboxGroup(
302
- label="Model sizes (in billions of parameters)",
303
- choices=list(utils.NUMERIC_INTERVALS.keys()),
304
- value=list(utils.NUMERIC_INTERVALS.keys()),
305
- interactive=True,
306
- elem_id="filter-columns-size",
307
- )
308
 
309
- leaderboard_table = gr.components.Dataframe(
310
- value=leaderboard_df[
311
- [c.name for c in utils.fields(utils.AutoEvalColumn) if c.never_hidden]
312
- + shown_columns.value
313
- + [utils.AutoEvalColumn.dummy.name]
314
- ].sort_values(by="Overall Humanlike %", ascending=False),
315
- headers=[c.name for c in utils.fields(utils.AutoEvalColumn) if c.never_hidden] + shown_columns.value,
316
- datatype=utils.TYPES,
317
- elem_id="leaderboard-table",
318
- interactive=False,
319
- visible=True,
320
- column_widths=["33%", "33%"]
321
- )
322
 
323
- # Dummy leaderboard for handling the case when the user uses backspace key
324
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
325
- value=original_df[utils.COLS],
326
- headers=utils.COLS,
327
- datatype=utils.TYPES,
328
- visible=False,
329
- )
330
- search_bar.submit(
331
- update_table,
332
- [
333
- hidden_leaderboard_table_for_search,
334
- shown_columns,
335
- #filter_columns_type,
336
- filter_columns_precision,
337
- filter_columns_size,
338
- deleted_models_visibility,
339
- search_bar,
340
- ],
341
- leaderboard_table,
342
- )
343
- # for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
344
- for selector in [shown_columns, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
345
- selector.change(
346
  update_table,
347
  [
348
  hidden_leaderboard_table_for_search,
@@ -354,133 +305,125 @@ with demo:
354
  search_bar,
355
  ],
356
  leaderboard_table,
357
- queue=True,
358
  )
359
-
360
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
361
- gr.Markdown(about.LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
362
-
363
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
364
- with gr.Column():
365
- with gr.Row():
366
- gr.Markdown(about.EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
367
-
368
- with gr.Column():
369
- with gr.Accordion(
370
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
371
- open=False,
372
- ):
373
- with gr.Row():
374
- finished_eval_table = gr.components.Dataframe(
375
- value=finished_eval_queue_df,
376
- headers=utils.EVAL_COLS,
377
- datatype=utils.EVAL_TYPES,
378
- row_count=5,
379
- )
380
- with gr.Accordion(
381
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
382
- open=False,
383
- ):
384
- with gr.Row():
385
- running_eval_table = gr.components.Dataframe(
386
- value=running_eval_queue_df,
387
- headers=utils.EVAL_COLS,
388
- datatype=utils.EVAL_TYPES,
389
- row_count=5,
390
- )
391
-
392
- with gr.Accordion(
393
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
394
- open=False,
395
- ):
396
- with gr.Row():
397
- pending_eval_table = gr.components.Dataframe(
398
- value=pending_eval_queue_df,
399
- headers=utils.EVAL_COLS,
400
- datatype=utils.EVAL_TYPES,
401
- row_count=5,
402
- )
403
- with gr.Row():
404
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
405
-
406
- with gr.Row():
407
- with gr.Column():
408
- model_name_textbox = gr.Textbox(label="Model name")
409
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
410
- model_type = gr.Dropdown(
411
- choices=[t.to_str(" : ") for t in utils.ModelType if t != utils.ModelType.Unknown],
412
- label="Model type",
413
- multiselect=False,
414
- value=None,
415
- interactive=True,
416
- )
417
-
418
- with gr.Column():
419
- precision = gr.Dropdown(
420
- choices=[i.value.name for i in utils.Precision if i != utils.Precision.Unknown],
421
- label="Precision",
422
- multiselect=False,
423
- value="float16",
424
- interactive=True,
425
  )
426
- weight_type = gr.Dropdown(
427
- choices=[i.value.name for i in utils.WeightType],
428
- label="Weights type",
429
- multiselect=False,
430
- value="Original",
431
- interactive=True,
432
- )
433
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
434
-
435
- submit_button = gr.Button("Submit Eval")
436
- submission_result = gr.Markdown()
437
- submit_button.click(
438
- submit.add_new_eval,
439
- [
440
- model_name_textbox,
441
- base_model_name_textbox,
442
- revision_name_textbox,
443
- precision,
444
- weight_type,
445
- model_type,
446
- ],
447
- submission_result,
448
- )
449
-
450
- with gr.Row():
451
- with gr.Accordion("📙 Citation", open=False):
452
- citation_button = gr.Textbox(
453
- value=about.CITATION_BUTTON_TEXT,
454
- label=about.CITATION_BUTTON_LABEL,
455
- lines=20,
456
- elem_id="citation-button",
457
- show_copy_button=True,
458
- )
459
-
460
 
461
- # 在初始化完成后调用
462
- # original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
463
- # process_pending_evals()
464
 
465
- # try:
466
- # print(envs.EVAL_REQUESTS_PATH)
467
- # snapshot_download(
468
- # repo_id=envs.QUEUE_REPO, local_dir=envs.EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
469
- # )
470
- # except Exception:
471
- # restart_space()
472
- # try:
473
- # print(envs.EVAL_RESULTS_PATH)
474
- # snapshot_download(
475
- # repo_id=envs.RESULTS_REPO, local_dir=envs.EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
476
- # )
477
- # except Exception:
478
- # restart_space()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
- # raw_data, original_df = populate.get_leaderboard_df(envs.RESULTS_REPO, envs.QUEUE_REPO, utils.COLS, utils.BENCHMARK_COLS)
 
 
 
 
 
 
 
 
 
 
481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
  (
486
  finished_eval_queue_df,
 
51
  leaderboard_df = original_df.copy()
52
 
53
  def process_pending_evals():
54
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  current_pending_status = [PENDING_STATUS]
56
  print('_________________')
57
  manage_requests.check_completed_evals(
 
212
 
213
  return filtered_df
214
 
215
+ try:
216
+ demo = gr.Blocks(css=custom_css)
217
+ with demo:
218
+ gr.HTML(about.TITLE)
219
+ gr.Markdown(about.INTRODUCTION_TEXT, elem_classes="markdown-text")
220
 
221
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
222
+ with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
223
+ with gr.Row():
224
+ with gr.Column():
225
+ with gr.Row():
226
+ search_bar = gr.Textbox(
227
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
228
+ show_label=False,
229
+ elem_id="search-bar",
230
+ )
231
+ with gr.Row():
232
+ shown_columns = gr.CheckboxGroup(
233
+ choices=[
234
+ c.name
235
+ for c in utils.fields(utils.AutoEvalColumn)
236
+ if not c.hidden and not c.never_hidden and not c.dummy
237
+ ],
238
+ value=[
239
+ c.name
240
+ for c in utils.fields(utils.AutoEvalColumn)
241
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
242
+ ],
243
+ label="Select columns to show",
244
+ elem_id="column-select",
245
+ interactive=True,
246
+ )
247
+ with gr.Row():
248
+ deleted_models_visibility = gr.Checkbox(
249
+ value=False, label="Show gated/private/deleted models", interactive=True
250
+ )
251
+ with gr.Column(min_width=320):
252
+ #with gr.Box(elem_id="box-filter"):
253
+ # filter_columns_type = gr.CheckboxGroup(
254
+ # label="Model types",
255
+ # choices=[t.to_str() for t in utils.ModelType],
256
+ # value=[t.to_str() for t in utils.ModelType],
257
+ # interactive=True,
258
+ # elem_id="filter-columns-type",
259
+ # )
260
+ filter_columns_precision = gr.CheckboxGroup(
261
+ label="Precision",
262
+ choices=[i.value.name for i in utils.Precision],
263
+ value=[i.value.name for i in utils.Precision],
264
  interactive=True,
265
+ elem_id="filter-columns-precision",
266
  )
267
+ filter_columns_size = gr.CheckboxGroup(
268
+ label="Model sizes (in billions of parameters)",
269
+ choices=list(utils.NUMERIC_INTERVALS.keys()),
270
+ value=list(utils.NUMERIC_INTERVALS.keys()),
271
+ interactive=True,
272
+ elem_id="filter-columns-size",
273
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ leaderboard_table = gr.components.Dataframe(
276
+ value=leaderboard_df[
277
+ [c.name for c in utils.fields(utils.AutoEvalColumn) if c.never_hidden]
278
+ + shown_columns.value
279
+ + [utils.AutoEvalColumn.dummy.name]
280
+ ].sort_values(by="Overall Humanlike %", ascending=False),
281
+ headers=[c.name for c in utils.fields(utils.AutoEvalColumn) if c.never_hidden] + shown_columns.value,
282
+ datatype=utils.TYPES,
283
+ elem_id="leaderboard-table",
284
+ interactive=False,
285
+ visible=True,
286
+ column_widths=["33%", "33%"]
287
+ )
288
 
289
+ # Dummy leaderboard for handling the case when the user uses backspace key
290
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
291
+ value=original_df[utils.COLS],
292
+ headers=utils.COLS,
293
+ datatype=utils.TYPES,
294
+ visible=False,
295
+ )
296
+ search_bar.submit(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  update_table,
298
  [
299
  hidden_leaderboard_table_for_search,
 
305
  search_bar,
306
  ],
307
  leaderboard_table,
 
308
  )
309
+ # for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
310
+ for selector in [shown_columns, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
311
+ selector.change(
312
+ update_table,
313
+ [
314
+ hidden_leaderboard_table_for_search,
315
+ shown_columns,
316
+ #filter_columns_type,
317
+ filter_columns_precision,
318
+ filter_columns_size,
319
+ deleted_models_visibility,
320
+ search_bar,
321
+ ],
322
+ leaderboard_table,
323
+ queue=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
+ with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
327
+ gr.Markdown(about.LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
328
 
329
+ with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
330
+ with gr.Column():
331
+ with gr.Row():
332
+ gr.Markdown(about.EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
333
+
334
+ with gr.Column():
335
+ with gr.Accordion(
336
+ f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
337
+ open=False,
338
+ ):
339
+ with gr.Row():
340
+ finished_eval_table = gr.components.Dataframe(
341
+ value=finished_eval_queue_df,
342
+ headers=utils.EVAL_COLS,
343
+ datatype=utils.EVAL_TYPES,
344
+ row_count=5,
345
+ )
346
+ with gr.Accordion(
347
+ f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
348
+ open=False,
349
+ ):
350
+ with gr.Row():
351
+ running_eval_table = gr.components.Dataframe(
352
+ value=running_eval_queue_df,
353
+ headers=utils.EVAL_COLS,
354
+ datatype=utils.EVAL_TYPES,
355
+ row_count=5,
356
+ )
357
+
358
+ with gr.Accordion(
359
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
360
+ open=False,
361
+ ):
362
+ with gr.Row():
363
+ pending_eval_table = gr.components.Dataframe(
364
+ value=pending_eval_queue_df,
365
+ headers=utils.EVAL_COLS,
366
+ datatype=utils.EVAL_TYPES,
367
+ row_count=5,
368
+ )
369
+ with gr.Row():
370
+ gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
371
 
372
+ with gr.Row():
373
+ with gr.Column():
374
+ model_name_textbox = gr.Textbox(label="Model name")
375
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
376
+ model_type = gr.Dropdown(
377
+ choices=[t.to_str(" : ") for t in utils.ModelType if t != utils.ModelType.Unknown],
378
+ label="Model type",
379
+ multiselect=False,
380
+ value=None,
381
+ interactive=True,
382
+ )
383
 
384
+ with gr.Column():
385
+ precision = gr.Dropdown(
386
+ choices=[i.value.name for i in utils.Precision if i != utils.Precision.Unknown],
387
+ label="Precision",
388
+ multiselect=False,
389
+ value="float16",
390
+ interactive=True,
391
+ )
392
+ weight_type = gr.Dropdown(
393
+ choices=[i.value.name for i in utils.WeightType],
394
+ label="Weights type",
395
+ multiselect=False,
396
+ value="Original",
397
+ interactive=True,
398
+ )
399
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
400
 
401
+ submit_button = gr.Button("Submit Eval")
402
+ submission_result = gr.Markdown()
403
+ submit_button.click(
404
+ submit.add_new_eval,
405
+ [
406
+ model_name_textbox,
407
+ base_model_name_textbox,
408
+ revision_name_textbox,
409
+ precision,
410
+ weight_type,
411
+ model_type,
412
+ ],
413
+ submission_result,
414
+ )
415
 
416
+ with gr.Row():
417
+ with gr.Accordion("📙 Citation", open=False):
418
+ citation_button = gr.Textbox(
419
+ value=about.CITATION_BUTTON_TEXT,
420
+ label=about.CITATION_BUTTON_LABEL,
421
+ lines=20,
422
+ elem_id="citation-button",
423
+ show_copy_button=True,
424
+ )
425
+ except Exception as e:
426
+ print(e)
427
 
428
  (
429
  finished_eval_queue_df,
src/backend/model_operations.py CHANGED
@@ -35,7 +35,7 @@ import spacy_transformers
35
  import subprocess
36
 
37
  # Run the command to download the spaCy model
38
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=True)
39
  # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
40
  # subprocess.run(["pip", "install", "spacy-transformers"], check=True)
41
  # subprocess.run(["pip", "install", "curated-transformers"], check=True)
@@ -45,7 +45,7 @@ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=Tr
45
  try:
46
  nlp1 = spacy.load("en_core_web_lg")
47
  except OSError:
48
- print("无法加载模型,继续执行其他处理。")
49
 
50
  # litellm.set_verbose=False
51
  litellm.set_verbose=True
@@ -537,6 +537,7 @@ class EvaluationModel:
537
  female_keyword = ["she", "her", "herself"]
538
  #print(len(responses_df["Experiment"]))
539
  for i in range(len(responses_df["Experiment"])):
 
540
  print(i, "/", len(responses_df["Experiment"]))
541
  # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
542
  # print()
@@ -592,7 +593,6 @@ class EvaluationModel:
592
  output.append("Other")
593
  else:
594
  words = rs.split() # split the response into words
595
- output = []
596
  if any(word == word1 for word in words) and any(word == word2 for word in words):
597
  output.append("Other")
598
  else:
@@ -607,12 +607,41 @@ class EvaluationModel:
607
  else:
608
  output.append("Long")
609
  else:
610
- output.append("Other")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
  '''Exp4'''
613
 
614
  elif responses_df["Experiment"][i] == "E4":
615
- filtered_lines = [r.split(':', 1)[-1].strip() if ':' in r else r for r in rs.split("\n")]
 
 
 
 
 
 
 
 
 
 
 
 
616
  filtered_lines = [r.split('-', 1)[-1].strip() if '-' in r else r for r in filtered_lines]
617
  rs = "\n".join(filtered_lines)
618
 
@@ -803,11 +832,8 @@ class EvaluationModel:
803
  output.append("NA")
804
  # print(output)
805
  # exit()
806
- '''human'''
807
- # self.data = pd.DataFrame(list(zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"], responses_df["Factor 2"], responses_df["Stimuli 1"], responses_df["Coding"], output)),
808
- # columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Simulate 1","Original_Coding","Coding"])
809
- '''LLM'''
810
- # print(len(output))
811
  self.data = pd.DataFrame(list(
812
  zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"],
813
  responses_df["Factor 2"], responses_df["Stimuli 1"], output)),
 
35
  import subprocess
36
 
37
  # Run the command to download the spaCy model
38
+ # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_lg"], check=True)
39
  # subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
40
  # subprocess.run(["pip", "install", "spacy-transformers"], check=True)
41
  # subprocess.run(["pip", "install", "curated-transformers"], check=True)
 
45
  try:
46
  nlp1 = spacy.load("en_core_web_lg")
47
  except OSError:
48
+ print("Can not load spacy model")
49
 
50
  # litellm.set_verbose=False
51
  litellm.set_verbose=True
 
537
  female_keyword = ["she", "her", "herself"]
538
  #print(len(responses_df["Experiment"]))
539
  for i in range(len(responses_df["Experiment"])):
540
+
541
  print(i, "/", len(responses_df["Experiment"]))
542
  # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
543
  # print()
 
593
  output.append("Other")
594
  else:
595
  words = rs.split() # split the response into words
 
596
  if any(word == word1 for word in words) and any(word == word2 for word in words):
597
  output.append("Other")
598
  else:
 
607
  else:
608
  output.append("Long")
609
  else:
610
+ if len(words) > 1:
611
+ # joint the words using " "
612
+ word = " ".join(words)
613
+ if word.lower() == word1.lower():
614
+ if len(word1) > len(word2):
615
+ output.append("Long")
616
+ else:
617
+ output.append("Short")
618
+ elif word.lower() == word2.lower():
619
+ if len(word1) > len(word2):
620
+ output.append("Short")
621
+ else:
622
+ output.append("Long")
623
+ else:
624
+ output.append("Other")
625
+ else:
626
+ output.append("Other")
627
+
628
 
629
  '''Exp4'''
630
 
631
  elif responses_df["Experiment"][i] == "E4":
632
+ lines = rs.split("\n")
633
+ filtered_lines = []
634
+ if len(lines) > 1:
635
+ for r in lines[1:]:
636
+ if ':' in r:
637
+ filtered_lines.append(r.split(':', 1)[-1].strip())
638
+ else:
639
+ filtered_lines.append(r)
640
+ filtered_lines.insert(0, lines[0])
641
+ else:
642
+ filtered_lines = lines
643
+ print(filtered_lines)
644
+
645
  filtered_lines = [r.split('-', 1)[-1].strip() if '-' in r else r for r in filtered_lines]
646
  rs = "\n".join(filtered_lines)
647
 
 
832
  output.append("NA")
833
  # print(output)
834
  # exit()
835
+ '''LLM'''
836
+ print(len(output))
 
 
 
837
  self.data = pd.DataFrame(list(
838
  zip(responses_df["Experiment"], responses_df["Question_ID"], responses_df["Item"], responses_df["Response"],
839
  responses_df["Factor 2"], responses_df["Stimuli 1"], output)),