JeffYang52415 commited on
Commit
0450c4e
1 Parent(s): a6c5f53

refactor: remove system prompt

Browse files
.gitignore CHANGED
@@ -38,3 +38,7 @@ dist/
38
  #notebook cache
39
  .ipynb_checkpoints/
40
  notebooks/
 
 
 
 
 
38
  #notebook cache
39
  .ipynb_checkpoints/
40
  notebooks/
41
+
42
+ #coverage
43
+ .coverage
44
+ .coverage.*
llmdataparser/base_parser.py CHANGED
@@ -25,7 +25,7 @@ VALID_CATEGORIES = {
25
  class ParseEntry:
26
  """A simple base class for entries, customizable by each dataset parser."""
27
 
28
- prompt: str
29
  answer: str
30
  raw_question: str
31
  raw_answer: str
@@ -166,18 +166,14 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
166
  _task_names: ClassVar[list[str]]
167
  # _default_task is the default task to use if no task is specified, e.g. "algebra"
168
  _default_task: ClassVar[str]
169
- # _default_system_prompt is the default system prompt to use if no system prompt is specified
170
- _default_system_prompt: ClassVar[str]
171
  # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
172
  _hidden_task_names: ClassVar[list[str]] = []
173
 
174
- def __init__(self, system_prompt: str | None = None, **kwargs: Any) -> None:
175
  """
176
  Initialize a HuggingFaceDatasetParser.
177
 
178
  Args:
179
- system_prompt: Optional custom system prompt to use instead of the default.
180
- If not provided, will use the class's _default_system_prompt.
181
  **kwargs: Additional keyword arguments passed to the parent class.
182
  """
183
  super().__init__()
@@ -187,8 +183,6 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
187
  self.split_names: list[str] = []
188
  # _current_task is the task currently being processed, e.g. "algebra"
189
  self._current_task: str = ""
190
- # _system_prompt is the system prompt currently being used
191
- self._system_prompt: str = system_prompt or self._default_system_prompt
192
 
193
  def _get_current_task(self, data_entry: dict[str, Any] | None = None) -> str:
194
  """
 
25
  class ParseEntry:
26
  """A simple base class for entries, customizable by each dataset parser."""
27
 
28
+ question: str
29
  answer: str
30
  raw_question: str
31
  raw_answer: str
 
166
  _task_names: ClassVar[list[str]]
167
  # _default_task is the default task to use if no task is specified, e.g. "algebra"
168
  _default_task: ClassVar[str]
 
 
169
  # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
170
  _hidden_task_names: ClassVar[list[str]] = []
171
 
172
+ def __init__(self, **kwargs: Any) -> None:
173
  """
174
  Initialize a HuggingFaceDatasetParser.
175
 
176
  Args:
 
 
177
  **kwargs: Additional keyword arguments passed to the parent class.
178
  """
179
  super().__init__()
 
183
  self.split_names: list[str] = []
184
  # _current_task is the task currently being processed, e.g. "algebra"
185
  self._current_task: str = ""
 
 
186
 
187
  def _get_current_task(self, data_entry: dict[str, Any] | None = None) -> str:
188
  """
llmdataparser/bbh_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import BBH_SYSTEM_PROMPT # You'll need to create this
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -17,14 +16,14 @@ class BBHParseEntry(HuggingFaceParseEntry):
17
  @classmethod
18
  def create(
19
  cls,
20
- prompt: str,
21
  answer: str,
22
  raw_question: str,
23
  raw_answer: str,
24
  task_name: str,
25
  ) -> "BBHParseEntry":
26
  return cls(
27
- prompt=prompt,
28
  answer=answer,
29
  raw_question=raw_question,
30
  raw_answer=raw_answer,
@@ -66,7 +65,6 @@ class BBHDatasetParser(HuggingFaceDatasetParser[BBHParseEntry]):
66
  "word_sorting",
67
  ]
68
  _default_task: ClassVar[str] = "reasoning_about_colored_objects"
69
- _default_system_prompt: ClassVar[str] = BBH_SYSTEM_PROMPT
70
 
71
  def process_entry(
72
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -78,14 +76,13 @@ class BBHDatasetParser(HuggingFaceDatasetParser[BBHParseEntry]):
78
  # Remove parentheses from the answer
79
  clean_answer = raw_answer.strip("()")
80
 
81
- # Combine system prompt with the question
82
- prompt = f"{self._system_prompt}\n\n{raw_question}"
83
 
84
  # Use task_name if provided, otherwise use default
85
  task = task_name or self._get_current_task(row)
86
 
87
  return BBHParseEntry.create(
88
- prompt=prompt,
89
  answer=clean_answer,
90
  raw_question=raw_question,
91
  raw_answer=raw_answer,
@@ -176,5 +173,5 @@ if __name__ == "__main__":
176
  example = parsed_data[0]
177
  print("\nExample parsed entry:")
178
  print(f"Task: {example.task_name}")
179
- print(f"Question: {example.raw_question}")
180
  print(f"Answer: {example.answer}")
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
16
  @classmethod
17
  def create(
18
  cls,
19
+ question: str,
20
  answer: str,
21
  raw_question: str,
22
  raw_answer: str,
23
  task_name: str,
24
  ) -> "BBHParseEntry":
25
  return cls(
26
+ question=question,
27
  answer=answer,
28
  raw_question=raw_question,
29
  raw_answer=raw_answer,
 
65
  "word_sorting",
66
  ]
67
  _default_task: ClassVar[str] = "reasoning_about_colored_objects"
 
68
 
69
  def process_entry(
70
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
76
  # Remove parentheses from the answer
77
  clean_answer = raw_answer.strip("()")
78
 
79
+ question = str(raw_question)
 
80
 
81
  # Use task_name if provided, otherwise use default
82
  task = task_name or self._get_current_task(row)
83
 
84
  return BBHParseEntry.create(
85
+ question=question,
86
  answer=clean_answer,
87
  raw_question=raw_question,
88
  raw_answer=raw_answer,
 
173
  example = parsed_data[0]
174
  print("\nExample parsed entry:")
175
  print(f"Task: {example.task_name}")
176
+ print(f"Question: {example.question}")
177
  print(f"Answer: {example.answer}")
llmdataparser/gsm8k_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import GSM8K_SYSTEM_PROMPT
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class GSM8KParseEntry(HuggingFaceParseEntry):
21
  @classmethod
22
  def create(
23
  cls,
24
- prompt: str,
25
  answer: str,
26
  raw_question: str,
27
  raw_answer: str,
@@ -30,7 +29,7 @@ class GSM8KParseEntry(HuggingFaceParseEntry):
30
  task_name: str,
31
  ) -> "GSM8KParseEntry":
32
  return cls(
33
- prompt=prompt,
34
  answer=answer,
35
  raw_question=raw_question,
36
  raw_answer=raw_answer,
@@ -46,7 +45,6 @@ class GSM8KDatasetParser(HuggingFaceDatasetParser[GSM8KParseEntry]):
46
  _data_source: ClassVar[str] = "openai/gsm8k"
47
  _task_names: ClassVar[list[str]] = ["main", "socratic"]
48
  _default_task: ClassVar[str] = "main"
49
- _default_system_prompt: ClassVar[str] = GSM8K_SYSTEM_PROMPT
50
 
51
  def process_entry(
52
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -69,10 +67,10 @@ class GSM8KDatasetParser(HuggingFaceDatasetParser[GSM8KParseEntry]):
69
  # Extract solution (everything before '####')
70
  solution = raw_answer.split("####")[0].strip()
71
 
72
- prompt = f"{self._system_prompt}\n{raw_question}"
73
 
74
  return GSM8KParseEntry.create(
75
- prompt=prompt,
76
  answer=str(numerical_answer),
77
  raw_question=raw_question,
78
  raw_answer=raw_answer,
@@ -145,7 +143,7 @@ if __name__ == "__main__":
145
  parser.parse()
146
 
147
  parsed_data = parser.get_parsed_data
148
- pprint(parsed_data[0].prompt)
149
  pprint(parsed_data[0].answer)
150
  pprint(parsed_data[0].raw_question)
151
  pprint(parsed_data[0].raw_answer)
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
20
  @classmethod
21
  def create(
22
  cls,
23
+ question: str,
24
  answer: str,
25
  raw_question: str,
26
  raw_answer: str,
 
29
  task_name: str,
30
  ) -> "GSM8KParseEntry":
31
  return cls(
32
+ question=question,
33
  answer=answer,
34
  raw_question=raw_question,
35
  raw_answer=raw_answer,
 
45
  _data_source: ClassVar[str] = "openai/gsm8k"
46
  _task_names: ClassVar[list[str]] = ["main", "socratic"]
47
  _default_task: ClassVar[str] = "main"
 
48
 
49
  def process_entry(
50
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
67
  # Extract solution (everything before '####')
68
  solution = raw_answer.split("####")[0].strip()
69
 
70
+ question = str(raw_question)
71
 
72
  return GSM8KParseEntry.create(
73
+ question=question,
74
  answer=str(numerical_answer),
75
  raw_question=raw_question,
76
  raw_answer=raw_answer,
 
143
  parser.parse()
144
 
145
  parsed_data = parser.get_parsed_data
146
+ pprint(parsed_data[0].question)
147
  pprint(parsed_data[0].answer)
148
  pprint(parsed_data[0].raw_question)
149
  pprint(parsed_data[0].raw_answer)
llmdataparser/humaneval_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import HUMANEVAL_SYSTEM_PROMPT
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -22,7 +21,7 @@ class HumanEvalParseEntry(HuggingFaceParseEntry):
22
  @classmethod
23
  def create(
24
  cls,
25
- prompt: str,
26
  answer: str,
27
  raw_question: str,
28
  task_id: str,
@@ -35,7 +34,7 @@ class HumanEvalParseEntry(HuggingFaceParseEntry):
35
  if not entry_point:
36
  raise ValueError("Entry point cannot be empty")
37
  return cls(
38
- prompt=prompt,
39
  answer=answer,
40
  raw_question=raw_question,
41
  raw_answer=answer, # In HumanEval, the canonical solution is the raw answer
@@ -52,7 +51,6 @@ class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
52
  _data_source: ClassVar[str] = "openai/openai_humaneval"
53
  _default_task: ClassVar[str] = "openai_humaneval"
54
  _task_names: ClassVar[list[str]] = ["openai_humaneval"]
55
- _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
56
 
57
  def process_entry(
58
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -64,14 +62,13 @@ class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
64
  entry_point = row["entry_point"]
65
  test = row["test"]
66
 
67
- # Combine system prompt with the function signature and docstring
68
- prompt = f"{self._system_prompt}\n\n{raw_question}"
69
 
70
  # Use task_name if provided, otherwise use default
71
  task = task_name or self._get_current_task(row)
72
 
73
  return HumanEvalParseEntry.create(
74
- prompt=prompt,
75
  answer=answer,
76
  raw_question=raw_question,
77
  task_id=task_id,
@@ -151,7 +148,6 @@ class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
151
  _data_source: ClassVar[str] = "evalplus/humanevalplus"
152
  _default_task: ClassVar[str] = "default"
153
  _task_names: ClassVar[list[str]] = ["default"]
154
- _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
155
 
156
  def process_entry(
157
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -163,14 +159,12 @@ class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
163
  entry_point = row["entry_point"]
164
  test = row["test"]
165
 
166
- # Combine system prompt with the function signature and docstring
167
- prompt = f"{self._system_prompt}\n\n{raw_question}"
168
-
169
  # Use task_name if provided, otherwise use default
170
  task = task_name or self._get_current_task(row)
171
 
172
  return HumanEvalParseEntry.create(
173
- prompt=prompt,
174
  answer=answer,
175
  raw_question=raw_question,
176
  task_id=task_id,
@@ -264,7 +258,7 @@ if __name__ == "__main__":
264
  print("\nExample parsed entry:")
265
  print(f"Task ID: {example.task_id}")
266
  print(f"Entry Point: {example.entry_point}")
267
- print(f"Prompt:\n{example.prompt}")
268
  print(f"Solution:\n{example.answer}")
269
 
270
  parser = HumanEvalDatasetPlusParser()
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
21
  @classmethod
22
  def create(
23
  cls,
24
+ question: str,
25
  answer: str,
26
  raw_question: str,
27
  task_id: str,
 
34
  if not entry_point:
35
  raise ValueError("Entry point cannot be empty")
36
  return cls(
37
+ question=question,
38
  answer=answer,
39
  raw_question=raw_question,
40
  raw_answer=answer, # In HumanEval, the canonical solution is the raw answer
 
51
  _data_source: ClassVar[str] = "openai/openai_humaneval"
52
  _default_task: ClassVar[str] = "openai_humaneval"
53
  _task_names: ClassVar[list[str]] = ["openai_humaneval"]
 
54
 
55
  def process_entry(
56
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
62
  entry_point = row["entry_point"]
63
  test = row["test"]
64
 
65
+ question = str(raw_question)
 
66
 
67
  # Use task_name if provided, otherwise use default
68
  task = task_name or self._get_current_task(row)
69
 
70
  return HumanEvalParseEntry.create(
71
+ question=question,
72
  answer=answer,
73
  raw_question=raw_question,
74
  task_id=task_id,
 
148
  _data_source: ClassVar[str] = "evalplus/humanevalplus"
149
  _default_task: ClassVar[str] = "default"
150
  _task_names: ClassVar[list[str]] = ["default"]
 
151
 
152
  def process_entry(
153
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
159
  entry_point = row["entry_point"]
160
  test = row["test"]
161
 
162
+ question = str(raw_question)
 
 
163
  # Use task_name if provided, otherwise use default
164
  task = task_name or self._get_current_task(row)
165
 
166
  return HumanEvalParseEntry.create(
167
+ question=question,
168
  answer=answer,
169
  raw_question=raw_question,
170
  task_id=task_id,
 
258
  print("\nExample parsed entry:")
259
  print(f"Task ID: {example.task_id}")
260
  print(f"Entry Point: {example.entry_point}")
261
+ print(f"Question:\n{example.question}")
262
  print(f"Solution:\n{example.answer}")
263
 
264
  parser = HumanEvalDatasetPlusParser()
llmdataparser/ifeval_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import IFEVAL_SYSTEM_PROMPT # You'll need to create this
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class IFEvalParseEntry(HuggingFaceParseEntry):
21
  @classmethod
22
  def create(
23
  cls,
24
- prompt: str,
25
  answer: str,
26
  raw_question: str,
27
  raw_answer: str,
@@ -31,7 +30,7 @@ class IFEvalParseEntry(HuggingFaceParseEntry):
31
  task_name: str,
32
  ) -> "IFEvalParseEntry":
33
  return cls(
34
- prompt=prompt,
35
  answer=answer,
36
  raw_question=raw_question,
37
  raw_answer=raw_answer,
@@ -48,7 +47,6 @@ class IFEvalDatasetParser(HuggingFaceDatasetParser[IFEvalParseEntry]):
48
  _data_source: ClassVar[str] = "google/IFEval"
49
  _default_task: ClassVar[str] = "default"
50
  _task_names: ClassVar[list[str]] = ["default"]
51
- _default_system_prompt: ClassVar[str] = IFEVAL_SYSTEM_PROMPT
52
 
53
  def process_entry(
54
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -65,14 +63,13 @@ class IFEvalDatasetParser(HuggingFaceDatasetParser[IFEvalParseEntry]):
65
  answer = ""
66
  raw_answer = ""
67
 
68
- # Combine system prompt with the instruction prompt
69
- prompt = f"{self._system_prompt}\n\n{raw_question}"
70
 
71
  # Use task_name if provided, otherwise use default
72
  task = task_name or self._get_current_task(row)
73
 
74
  return IFEvalParseEntry.create(
75
- prompt=prompt,
76
  answer=answer,
77
  raw_question=raw_question,
78
  raw_answer=raw_answer,
@@ -162,6 +159,6 @@ if __name__ == "__main__":
162
  example = parsed_data[0]
163
  print("\nExample parsed entry:")
164
  print(f"Key: {example.key}")
165
- print(f"Prompt: {example.prompt}")
166
  print(f"Instruction IDs: {example.instruction_id_list}")
167
  print(f"kwargs: {example.kwargs}")
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
20
  @classmethod
21
  def create(
22
  cls,
23
+ question: str,
24
  answer: str,
25
  raw_question: str,
26
  raw_answer: str,
 
30
  task_name: str,
31
  ) -> "IFEvalParseEntry":
32
  return cls(
33
+ question=question,
34
  answer=answer,
35
  raw_question=raw_question,
36
  raw_answer=raw_answer,
 
47
  _data_source: ClassVar[str] = "google/IFEval"
48
  _default_task: ClassVar[str] = "default"
49
  _task_names: ClassVar[list[str]] = ["default"]
 
50
 
51
  def process_entry(
52
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
63
  answer = ""
64
  raw_answer = ""
65
 
66
+ question = str(raw_question)
 
67
 
68
  # Use task_name if provided, otherwise use default
69
  task = task_name or self._get_current_task(row)
70
 
71
  return IFEvalParseEntry.create(
72
+ question=question,
73
  answer=answer,
74
  raw_question=raw_question,
75
  raw_answer=raw_answer,
 
159
  example = parsed_data[0]
160
  print("\nExample parsed entry:")
161
  print(f"Key: {example.key}")
162
+ print(f"Question: {example.question}")
163
  print(f"Instruction IDs: {example.instruction_id_list}")
164
  print(f"kwargs: {example.kwargs}")
llmdataparser/math_parser.py CHANGED
@@ -20,7 +20,7 @@ class MATHParseEntry(HuggingFaceParseEntry):
20
  @classmethod
21
  def create(
22
  cls,
23
- prompt: str,
24
  answer: str,
25
  raw_question: str,
26
  raw_answer: str,
@@ -29,7 +29,7 @@ class MATHParseEntry(HuggingFaceParseEntry):
29
  solution: str,
30
  ) -> "MATHParseEntry":
31
  return cls(
32
- prompt=prompt,
33
  answer=answer,
34
  raw_question=raw_question,
35
  raw_answer=raw_answer,
@@ -54,9 +54,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
54
  "all",
55
  ]
56
  _default_task: ClassVar[str] = "all"
57
- _default_system_prompt: ClassVar[str] = (
58
- "Solve the following mathematics problem step by step:"
59
- )
60
  _valid_levels: ClassVar[set[str]] = {
61
  f"Level {i}" for i in range(1, 6)
62
  } # Levels 1-5 are valid
@@ -80,7 +78,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
80
  level = "Unknown"
81
 
82
  return MATHParseEntry.create(
83
- prompt=f"{self._system_prompt}\n{row['problem']}",
84
  answer=row["solution"],
85
  raw_question=row["problem"],
86
  raw_answer=row["solution"],
@@ -187,5 +185,5 @@ if __name__ == "__main__":
187
  print("\nExample parsed entry:")
188
  print(f"Task: {example.task_name}")
189
  print(f"Level: {example.level}")
190
- print(f"Question: {example.raw_question}")
191
  print(f"Solution: {example.solution}")
 
20
  @classmethod
21
  def create(
22
  cls,
23
+ question: str,
24
  answer: str,
25
  raw_question: str,
26
  raw_answer: str,
 
29
  solution: str,
30
  ) -> "MATHParseEntry":
31
  return cls(
32
+ question=question,
33
  answer=answer,
34
  raw_question=raw_question,
35
  raw_answer=raw_answer,
 
54
  "all",
55
  ]
56
  _default_task: ClassVar[str] = "all"
57
+
 
 
58
  _valid_levels: ClassVar[set[str]] = {
59
  f"Level {i}" for i in range(1, 6)
60
  } # Levels 1-5 are valid
 
78
  level = "Unknown"
79
 
80
  return MATHParseEntry.create(
81
+ question=str(row["problem"]),
82
  answer=row["solution"],
83
  raw_question=row["problem"],
84
  raw_answer=row["solution"],
 
185
  print("\nExample parsed entry:")
186
  print(f"Task: {example.task_name}")
187
  print(f"Level: {example.level}")
188
+ print(f"Question: {example.question}")
189
  print(f"Solution: {example.solution}")
llmdataparser/mbpp_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import MBPP_SYSTEM_PROMPT
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -23,7 +22,7 @@ class MBPPParseEntry(HuggingFaceParseEntry):
23
  @classmethod
24
  def create(
25
  cls,
26
- prompt: str,
27
  answer: str,
28
  raw_question: str,
29
  task_id: int,
@@ -37,7 +36,7 @@ class MBPPParseEntry(HuggingFaceParseEntry):
37
  raise ValueError("Task ID must be an integer")
38
 
39
  return cls(
40
- prompt=prompt,
41
  answer=answer,
42
  raw_question=raw_question,
43
  raw_answer=answer, # In MBPP, the code solution is the raw answer
@@ -56,7 +55,6 @@ class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
56
  _data_source: ClassVar[str] = "google-research-datasets/mbpp"
57
  _default_task: ClassVar[str] = "full" # Can be 'full' or 'sanitized'
58
  _task_names: ClassVar[list[str]] = ["full", "sanitized"]
59
- _default_system_prompt: ClassVar[str] = MBPP_SYSTEM_PROMPT
60
 
61
  def process_entry(
62
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -69,15 +67,14 @@ class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
69
  test_setup_code = row.get("test_setup_code", "")
70
  challenge_test_list = row.get("challenge_test_list", [])
71
 
72
- # Combine system prompt with the task description
73
- prompt = f"{self._system_prompt}\n\nTask: {raw_question}"
74
 
75
  # Use task_name if provided, otherwise use default
76
  task = task_name or self._get_current_task(row)
77
  source_file = row.get("source_file", "")
78
 
79
  return MBPPParseEntry.create(
80
- prompt=prompt,
81
  answer=answer,
82
  raw_question=raw_question,
83
  task_id=task_id,
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
22
  @classmethod
23
  def create(
24
  cls,
25
+ question: str,
26
  answer: str,
27
  raw_question: str,
28
  task_id: int,
 
36
  raise ValueError("Task ID must be an integer")
37
 
38
  return cls(
39
+ question=question,
40
  answer=answer,
41
  raw_question=raw_question,
42
  raw_answer=answer, # In MBPP, the code solution is the raw answer
 
55
  _data_source: ClassVar[str] = "google-research-datasets/mbpp"
56
  _default_task: ClassVar[str] = "full" # Can be 'full' or 'sanitized'
57
  _task_names: ClassVar[list[str]] = ["full", "sanitized"]
 
58
 
59
  def process_entry(
60
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
67
  test_setup_code = row.get("test_setup_code", "")
68
  challenge_test_list = row.get("challenge_test_list", [])
69
 
70
+ question = str(raw_question)
 
71
 
72
  # Use task_name if provided, otherwise use default
73
  task = task_name or self._get_current_task(row)
74
  source_file = row.get("source_file", "")
75
 
76
  return MBPPParseEntry.create(
77
+ question=question,
78
  answer=answer,
79
  raw_question=raw_question,
80
  task_id=task_id,
llmdataparser/mgsm_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import MGSM_SYSTEM_PROMPT
11
 
12
 
13
  @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class MGSMParseEntry(HuggingFaceParseEntry):
21
  @classmethod
22
  def create(
23
  cls,
24
- prompt: str,
25
  answer: str,
26
  raw_question: str,
27
  raw_answer: str,
@@ -31,7 +30,7 @@ class MGSMParseEntry(HuggingFaceParseEntry):
31
  language: str,
32
  ) -> "MGSMParseEntry":
33
  return cls(
34
- prompt=prompt,
35
  answer=answer,
36
  raw_question=raw_question,
37
  raw_answer=raw_answer,
@@ -60,7 +59,6 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
60
  "th",
61
  "zh",
62
  ]
63
- _default_system_prompt: ClassVar[str] = MGSM_SYSTEM_PROMPT
64
 
65
  def process_entry(
66
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -73,7 +71,7 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
73
  task_name: Language code for the current task
74
 
75
  Returns:
76
- MGSMParseEntry: Processed entry with prompt, answer, and metadata
77
  """
78
  task = task_name or self._get_current_task(row)
79
  raw_question = row["question"]
@@ -81,14 +79,13 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
81
  numerical_answer = row["answer_number"]
82
  equation_solution = row["equation_solution"]
83
 
84
- # Construct the prompt with the system prompt and question
85
- prompt = f"{self._system_prompt}\n{raw_question}"
86
 
87
  # Use numerical answer as string for the answer field if no detailed answer is provided
88
  answer = raw_answer if raw_answer else str(numerical_answer)
89
 
90
  return MGSMParseEntry.create(
91
- prompt=prompt,
92
  answer=answer,
93
  raw_question=raw_question,
94
  raw_answer=raw_answer,
@@ -188,7 +185,7 @@ if __name__ == "__main__":
188
  parser.parse()
189
 
190
  parsed_data = parser.get_parsed_data
191
- pprint(parsed_data[0].prompt)
192
  pprint(parsed_data[0].answer)
193
  pprint(parsed_data[0].raw_question)
194
  pprint(parsed_data[0].numerical_answer)
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
 
12
  @dataclass(frozen=True, kw_only=True, slots=True)
 
20
  @classmethod
21
  def create(
22
  cls,
23
+ question: str,
24
  answer: str,
25
  raw_question: str,
26
  raw_answer: str,
 
30
  language: str,
31
  ) -> "MGSMParseEntry":
32
  return cls(
33
+ question=question,
34
  answer=answer,
35
  raw_question=raw_question,
36
  raw_answer=raw_answer,
 
59
  "th",
60
  "zh",
61
  ]
 
62
 
63
  def process_entry(
64
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
71
  task_name: Language code for the current task
72
 
73
  Returns:
74
+ MGSMParseEntry: Processed entry with question, answer, and metadata
75
  """
76
  task = task_name or self._get_current_task(row)
77
  raw_question = row["question"]
 
79
  numerical_answer = row["answer_number"]
80
  equation_solution = row["equation_solution"]
81
 
82
+ question = str(raw_question)
 
83
 
84
  # Use numerical answer as string for the answer field if no detailed answer is provided
85
  answer = raw_answer if raw_answer else str(numerical_answer)
86
 
87
  return MGSMParseEntry.create(
88
+ question=question,
89
  answer=answer,
90
  raw_question=raw_question,
91
  raw_answer=raw_answer,
 
185
  parser.parse()
186
 
187
  parsed_data = parser.get_parsed_data
188
+ pprint(parsed_data[0].question)
189
  pprint(parsed_data[0].answer)
190
  pprint(parsed_data[0].raw_question)
191
  pprint(parsed_data[0].numerical_answer)
llmdataparser/mmlu_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import MMLU_PRO_SYSTEM_PROMPT, MMLU_SYSTEM_PROMPT
11
 
12
  MMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
13
  MMLU_PRO_VALID_ANSWERS: Final[set[str]] = {
@@ -36,7 +35,7 @@ class MMLUParseEntry(HuggingFaceParseEntry):
36
  @classmethod
37
  def create(
38
  cls,
39
- prompt: str,
40
  answer: str,
41
  raw_question: str,
42
  raw_choices: list[str],
@@ -50,7 +49,7 @@ class MMLUParseEntry(HuggingFaceParseEntry):
50
  if not task_name:
51
  raise ValueError("Task name cannot be empty")
52
  return cls(
53
- prompt=prompt,
54
  answer=answer,
55
  raw_question=raw_question,
56
  raw_answer=raw_answer,
@@ -69,7 +68,7 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
69
  @classmethod
70
  def create(
71
  cls,
72
- prompt: str,
73
  answer: str,
74
  raw_question: str,
75
  raw_choices: list[str],
@@ -83,7 +82,7 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
83
  if not task_name:
84
  raise ValueError("Task name cannot be empty")
85
  return cls(
86
- prompt=prompt,
87
  answer=answer,
88
  raw_question=raw_question,
89
  raw_choices=raw_choices,
@@ -95,8 +94,6 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
95
  class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
96
  """Base class for MMLU dataset parsers with common functionality."""
97
 
98
- _default_system_prompt = MMLU_SYSTEM_PROMPT
99
-
100
  def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
101
  """Get the task name from the data entry or default task name."""
102
  task_name: str = data_entry.get("subject", "")
@@ -106,7 +103,7 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
106
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
107
  ) -> MMLUParseEntry:
108
  """
109
- Generate a prompt and expected answer from the given row.
110
 
111
  Args:
112
  row: A data point to be formatted.
@@ -127,11 +124,11 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
127
  raw_choices = row["choices"]
128
  raw_answer = str(row["answer"]) # Ensure raw_answer is a string
129
 
130
- prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
131
  answer_letter = chr(65 + int(raw_answer)) # Convert index to 'A', 'B', 'C', 'D'
132
 
133
  return MMLUParseEntry.create(
134
- prompt=prompt,
135
  answer=answer_letter,
136
  raw_question=raw_question,
137
  raw_choices=raw_choices,
@@ -482,11 +479,11 @@ class TMMLUPlusDatasetParser(MMLUDatasetParser):
482
  raw_question = row["question"]
483
  raw_answer = row["answer"]
484
 
485
- prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
486
  task = task_name or self._get_current_task(row)
487
 
488
  return MMLUParseEntry.create(
489
- prompt, raw_answer, raw_question, raw_choices, raw_answer, task
490
  )
491
 
492
  def get_dataset_description(self) -> DatasetDescription:
@@ -572,7 +569,6 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
572
  "computer_science",
573
  "history",
574
  ]
575
- _default_system_prompt = MMLU_PRO_SYSTEM_PROMPT
576
 
577
  def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
578
  """Get the task name from the data entry or default task name."""
@@ -586,7 +582,7 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
586
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
587
  ) -> MMLUProParseEntry:
588
  """
589
- Generate a prompt and expected answer from the given row.
590
 
591
  Args:
592
  row (dict[str, Any]): A data point to be formatted with MMLU Pro specific structure
@@ -608,13 +604,13 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
608
  raw_answer = row["answer"]
609
  answer_index = row["answer_index"]
610
 
611
- prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
612
  answer_letter = chr(
613
  65 + answer_index
614
  ) # Convert index to 'A', 'B', 'C', 'D', etc.
615
 
616
  return MMLUProParseEntry.create(
617
- prompt, answer_letter, raw_question, raw_choices, raw_answer, final_task
618
  )
619
 
620
  def get_dataset_description(self) -> DatasetDescription:
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
  MMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
12
  MMLU_PRO_VALID_ANSWERS: Final[set[str]] = {
 
35
  @classmethod
36
  def create(
37
  cls,
38
+ question: str,
39
  answer: str,
40
  raw_question: str,
41
  raw_choices: list[str],
 
49
  if not task_name:
50
  raise ValueError("Task name cannot be empty")
51
  return cls(
52
+ question=question,
53
  answer=answer,
54
  raw_question=raw_question,
55
  raw_answer=raw_answer,
 
68
  @classmethod
69
  def create(
70
  cls,
71
+ question: str,
72
  answer: str,
73
  raw_question: str,
74
  raw_choices: list[str],
 
82
  if not task_name:
83
  raise ValueError("Task name cannot be empty")
84
  return cls(
85
+ question=question,
86
  answer=answer,
87
  raw_question=raw_question,
88
  raw_choices=raw_choices,
 
94
  class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
95
  """Base class for MMLU dataset parsers with common functionality."""
96
 
 
 
97
  def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
98
  """Get the task name from the data entry or default task name."""
99
  task_name: str = data_entry.get("subject", "")
 
103
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
104
  ) -> MMLUParseEntry:
105
  """
106
+ Generate a question and expected answer from the given row.
107
 
108
  Args:
109
  row: A data point to be formatted.
 
124
  raw_choices = row["choices"]
125
  raw_answer = str(row["answer"]) # Ensure raw_answer is a string
126
 
127
+ question = f"Question: {raw_question}\n{choices}\nAnswer:"
128
  answer_letter = chr(65 + int(raw_answer)) # Convert index to 'A', 'B', 'C', 'D'
129
 
130
  return MMLUParseEntry.create(
131
+ question=question,
132
  answer=answer_letter,
133
  raw_question=raw_question,
134
  raw_choices=raw_choices,
 
479
  raw_question = row["question"]
480
  raw_answer = row["answer"]
481
 
482
+ question = f"Question: {raw_question}\n{choices}\nAnswer:"
483
  task = task_name or self._get_current_task(row)
484
 
485
  return MMLUParseEntry.create(
486
+ question, raw_answer, raw_question, raw_choices, raw_answer, task
487
  )
488
 
489
  def get_dataset_description(self) -> DatasetDescription:
 
569
  "computer_science",
570
  "history",
571
  ]
 
572
 
573
  def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
574
  """Get the task name from the data entry or default task name."""
 
582
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
583
  ) -> MMLUProParseEntry:
584
  """
585
+ Generate a question and expected answer from the given row.
586
 
587
  Args:
588
  row (dict[str, Any]): A data point to be formatted with MMLU Pro specific structure
 
604
  raw_answer = row["answer"]
605
  answer_index = row["answer_index"]
606
 
607
+ question = f"Question: {raw_question}\n{choices}\nAnswer:"
608
  answer_letter = chr(
609
  65 + answer_index
610
  ) # Convert index to 'A', 'B', 'C', 'D', etc.
611
 
612
  return MMLUProParseEntry.create(
613
+ question, answer_letter, raw_question, raw_choices, raw_answer, final_task
614
  )
615
 
616
  def get_dataset_description(self) -> DatasetDescription:
llmdataparser/prompts.py CHANGED
@@ -3,164 +3,65 @@ from typing import Final
3
 
4
  MMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
5
  """\
6
- You are a highly knowledgeable expert tasked with answering multiple-choice questions across various academic and professional fields. Each question has four options (A, B, C, D). Your goal is to select the single most accurate answer based on factual knowledge.
7
-
8
- Instructions:
9
- 1. Carefully analyze the question and all answer options
10
- 2. Consider only verified, factual information
11
- 3. Select the most precise and accurate option
12
- 4. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
13
  """
14
  )
15
 
16
  MMLU_PRO_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
17
  """\
18
- You are a highly knowledgeable expert tasked with answering multiple-choice questions across various academic and professional fields. Each question has ten options (A through J). Your goal is to select the single most accurate answer based on factual knowledge.
19
-
20
- Instructions:
21
- 1. Carefully analyze the question and all answer options
22
- 2. Consider only verified, factual information
23
- 3. Select the most precise and accurate option
24
- 4. Respond with ONLY the letter (A through J) - no explanations or additional text
25
  """
26
  )
27
 
28
  GSM8K_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
29
  """\
30
- You are an expert mathematics tutor. Your task is to solve math word problems by breaking them down into clear, logical steps.
31
-
32
- Instructions:
33
- 1. Read the problem carefully
34
- 2. Show your step-by-step reasoning
35
- 3. Ensure each step is clear and mathematically sound
36
- 4. End with the final numerical answer
37
- 5. Format your response as:
38
- Let's solve this step by step:
39
- 1) [First step]
40
- 2) [Second step]
41
- ...
42
- Therefore, the answer is [number]
43
  """
44
  )
45
 
46
-
47
  HUMANEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
48
  """\
49
- You are an expert Python programmer tasked with implementing Python functions. Your goal is to write clean, efficient, and correct code that meets the specifications.
50
-
51
- Instructions:
52
- 1. Read the function signature and docstring carefully
53
- 2. Implement only the function body, not the signature or docstring
54
- 3. Follow Python best practices and PEP 8 style guidelines
55
- 4. Write clear, readable code with appropriate variable names
56
- 5. Handle edge cases and input validation where necessary
57
- 6. Use type hints and ensure type safety
58
- 7. Optimize for both readability and performance
59
- 8. Add comments for complex logic or non-obvious implementations
60
- 9. Include appropriate error handling with specific exception types
61
- 10. Consider writing code that would be easy to test
62
- 11. Return only the implementation code, no additional text
63
-
64
- Example of good implementation:
65
- ```python
66
- # Handle edge case of empty input
67
- if not numbers:
68
- raise ValueError("Input list cannot be empty")
69
-
70
- # Use descriptive variable names and type hints
71
- result: list[int] = sorted(numbers)
72
- return result[len(result) // 2] # Return median value
73
- ```
74
  """
75
  )
76
 
77
- MGSM_SYSTEM_PROMPT = textwrap.dedent(
78
  """\
79
- You are an expert mathematics tutor who can explain solutions in multiple languages. Your task is to solve math word problems by breaking them down into clear, logical steps.
80
-
81
- Instructions:
82
- 1. Read the problem carefully
83
- 2. Show your step-by-step reasoning
84
- 3. Ensure each step is clear and mathematically sound
85
- 4. Use appropriate number formatting for the target language (e.g., decimal points vs. commas)
86
- 5. End with the final numerical answer
87
- 6. Format your response as:
88
- Let's solve this step by step:
89
- 1) [First step]
90
- 2) [Second step]
91
- ...
92
- Therefore, the answer is [number]
93
  """
94
  )
95
 
96
-
97
  IFEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
98
  """\
99
- You are a precise instruction follower. Your task is to generate responses that exactly match given requirements and constraints.
100
-
101
- Instructions:
102
- 1. Read all requirements carefully
103
- 2. Follow formatting rules exactly
104
- 3. Meet all length requirements
105
- 4. Include all required elements
106
- 5. Avoid forbidden elements
107
- 6. Provide ONLY the requested output
108
  """
109
  )
110
 
111
  BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
112
  """\
113
- You are a highly intelligent expert tasked with solving complex reasoning problems. These problems test various cognitive abilities including logical deduction, causal reasoning, mathematical thinking, and spatial understanding.
114
-
115
- Instructions:
116
- 1. Read the entire problem carefully, including all given conditions and rules
117
- 2. Pay attention to the specific type of reasoning required (logical, temporal, spatial, etc.)
118
- 3. Consider all relationships and constraints mentioned in the problem
119
- 4. Apply structured thinking to reach a valid conclusion
120
- 5. Choose the answer that logically follows from the given information
121
- 6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" and so on - no explanations or additional text
122
  """
123
  )
124
 
125
  MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
126
  """\
127
- You are an expert Python programmer tasked with solving basic programming problems. Your goal is to write clean, efficient, and well-tested Python code that solves the given task.
128
-
129
- Instructions:
130
- 1. Read the task description carefully
131
- 2. Write a complete Python solution that solves the problem
132
- 3. Follow Python best practices and PEP 8 style guidelines
133
- 4. Write clear, readable code with descriptive variable names
134
- 5. Handle edge cases and input validation appropriately
135
- 6. Include docstrings or comments to explain complex logic
136
- 7. Focus on fundamental programming concepts and standard library usage
137
- 8. Optimize for readability and maintainability
138
- 9. Return only the implementation code, no additional text
139
  """
140
  )
141
 
142
  TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
143
  """\
144
- You are an expert lawyer with deep knowledge of Taiwan's legal system. For each question, you will analyze legal scenarios or concepts based on Taiwan's laws and regulations. Your task is to select the most appropriate answer that aligns with Taiwan's legal principles.
145
-
146
- Instructions:
147
- 1. Carefully analyze the legal question and all options
148
- 2. Consider Taiwan's specific legal context and terminology
149
- 3. Apply relevant laws, regulations, and legal principles
150
- 4. Select the single most accurate answer
151
- 5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
152
  """
153
  )
154
 
155
  TMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
156
  """\
157
- You are an expert evaluator with deep knowledge of Taiwan's educational system and professional fields. For each question, analyze it carefully and select the most appropriate answer based on your understanding of the subject matter.
158
-
159
- Instructions:
160
- 1. Carefully read and understand the question
161
- 2. Consider all answer options thoroughly
162
- 3. Apply subject-specific knowledge and reasoning
163
- 4. Select the single most accurate answer
164
- 5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
165
  """
166
  )
 
3
 
4
  MMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
5
  """\
6
+ You are an expert answering multiple-choice questions. Select the single most accurate answer (A, B, C, or D) based on factual knowledge. Respond with the letter only.
 
 
 
 
 
 
7
  """
8
  )
9
 
10
  MMLU_PRO_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
11
  """\
12
+ You are an expert answering multiple-choice questions. Select the single most accurate answer (A through J) based on factual knowledge. Respond with the letter only.
 
 
 
 
 
 
13
  """
14
  )
15
 
16
  GSM8K_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
17
  """\
18
+ Solve this math problem step by step:
19
+ 1) Show your reasoning
20
+ 2) End with "Therefore, the answer is [number]"
 
 
 
 
 
 
 
 
 
 
21
  """
22
  )
23
 
 
24
  HUMANEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
25
  """\
26
+ Implement the Python function following best practices. Include error handling, type hints, and comments for complex logic. Return only the implementation code.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  """
28
  )
29
 
30
+ MGSM_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
31
  """\
32
+ Solve this math problem step by step in the specified language:
33
+ 1) Show your reasoning
34
+ 2) Use appropriate number formatting
35
+ 3) End with "Therefore, the answer is [number]"
 
 
 
 
 
 
 
 
 
 
36
  """
37
  )
38
 
 
39
  IFEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
40
  """\
41
+ Follow the given requirements exactly. Provide only the requested output.
 
 
 
 
 
 
 
 
42
  """
43
  )
44
 
45
  BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
46
  """\
47
+ Solve this reasoning problem and respond with only the answer (letter, True/False, or Yes/No).
 
 
 
 
 
 
 
 
48
  """
49
  )
50
 
51
  MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
52
  """\
53
+ Write clean, efficient Python code that solves the given task. Include docstrings and handle edge cases. Return only the implementation code.
 
 
 
 
 
 
 
 
 
 
 
54
  """
55
  )
56
 
57
  TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
58
  """\
59
+ As a Taiwan legal expert, select the most accurate answer (A, B, C, or D) based on Taiwan's laws. Respond with the letter only.
 
 
 
 
 
 
 
60
  """
61
  )
62
 
63
  TMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
64
  """\
65
+ Select the most accurate answer (A, B, C, or D) based on Taiwan's educational and professional knowledge. Respond with the letter only.
 
 
 
 
 
 
 
66
  """
67
  )
llmdataparser/tmlu_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import TMLU_SYSTEM_PROMPT
11
 
12
  TMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
13
  TMLU_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TMLU_VALID_ANSWERS))
@@ -24,7 +23,7 @@ class TMLUParseEntry(HuggingFaceParseEntry):
24
  @classmethod
25
  def create(
26
  cls,
27
- prompt: str,
28
  answer: str,
29
  raw_question: str,
30
  raw_choices: list[str],
@@ -38,7 +37,7 @@ class TMLUParseEntry(HuggingFaceParseEntry):
38
  f"Invalid answer_letter '{answer}'; must be one of {TMLU_VALID_ANSWER_STR}"
39
  )
40
  return cls(
41
- prompt=prompt,
42
  answer=answer,
43
  raw_question=raw_question,
44
  raw_answer=raw_answer,
@@ -93,7 +92,6 @@ class TMLUDatasetParser(HuggingFaceDatasetParser[TMLUParseEntry]):
93
  "teacher_qualification",
94
  "accountant",
95
  ]
96
- _default_system_prompt = TMLU_SYSTEM_PROMPT
97
 
98
  def process_entry(
99
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -110,10 +108,10 @@ class TMLUDatasetParser(HuggingFaceDatasetParser[TMLUParseEntry]):
110
  explanation = row.get("explanation", "")
111
  metadata = row.get("metadata", {})
112
 
113
- prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
114
 
115
  return TMLUParseEntry.create(
116
- prompt=prompt,
117
  answer=raw_answer,
118
  raw_question=raw_question,
119
  raw_choices=raw_choices,
@@ -187,7 +185,7 @@ if __name__ == "__main__":
187
  example = parsed_data[0]
188
  print("\nExample parsed entry:")
189
  print(f"Task: {example.task_name}")
190
- print(f"Question: {example.raw_question}")
191
  print("Choices:")
192
  for i, choice in enumerate(example.raw_choices):
193
  print(f"{chr(65 + i)}. {choice}")
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
  TMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
12
  TMLU_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TMLU_VALID_ANSWERS))
 
23
  @classmethod
24
  def create(
25
  cls,
26
+ question: str,
27
  answer: str,
28
  raw_question: str,
29
  raw_choices: list[str],
 
37
  f"Invalid answer_letter '{answer}'; must be one of {TMLU_VALID_ANSWER_STR}"
38
  )
39
  return cls(
40
+ question=question,
41
  answer=answer,
42
  raw_question=raw_question,
43
  raw_answer=raw_answer,
 
92
  "teacher_qualification",
93
  "accountant",
94
  ]
 
95
 
96
  def process_entry(
97
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
108
  explanation = row.get("explanation", "")
109
  metadata = row.get("metadata", {})
110
 
111
+ question = f"Question: {raw_question}\n{choices}\nAnswer:"
112
 
113
  return TMLUParseEntry.create(
114
+ question=question,
115
  answer=raw_answer,
116
  raw_question=raw_question,
117
  raw_choices=raw_choices,
 
185
  example = parsed_data[0]
186
  print("\nExample parsed entry:")
187
  print(f"Task: {example.task_name}")
188
+ print(f"Question: {example.question}")
189
  print("Choices:")
190
  for i, choice in enumerate(example.raw_choices):
191
  print(f"{chr(65 + i)}. {choice}")
llmdataparser/tw_legal_parser.py CHANGED
@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
10
- from llmdataparser.prompts import TW_LEGAL_SYSTEM_PROMPT
11
 
12
  TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
13
  TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
@@ -22,7 +21,7 @@ class TWLegalParseEntry(HuggingFaceParseEntry):
22
  @classmethod
23
  def create(
24
  cls,
25
- prompt: str,
26
  answer: str,
27
  raw_question: str,
28
  raw_choices: list[str],
@@ -34,7 +33,7 @@ class TWLegalParseEntry(HuggingFaceParseEntry):
34
  f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
35
  )
36
  return cls(
37
- prompt=prompt,
38
  answer=answer,
39
  raw_question=raw_question,
40
  raw_answer=raw_answer,
@@ -49,7 +48,6 @@ class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
49
  _data_source = "lianghsun/tw-legal-benchmark-v1"
50
  _default_task = "default"
51
  _task_names = ["default"]
52
- _default_system_prompt = TW_LEGAL_SYSTEM_PROMPT
53
 
54
  def process_entry(
55
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -64,10 +62,10 @@ class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
64
  raw_question = row["question"]
65
  raw_answer = row["answer"]
66
 
67
- prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
68
 
69
  return TWLegalParseEntry.create(
70
- prompt=prompt,
71
  answer=raw_answer,
72
  raw_question=raw_question,
73
  raw_choices=raw_choices,
@@ -119,7 +117,7 @@ if __name__ == "__main__":
119
  if parsed_data:
120
  example = parsed_data[0]
121
  print("\nExample parsed entry:")
122
- print(f"Question: {example.raw_question}")
123
  print("Choices:")
124
  for i, choice in enumerate(example.raw_choices):
125
  print(f"{chr(65 + i)}. {choice}")
 
7
  HuggingFaceDatasetParser,
8
  HuggingFaceParseEntry,
9
  )
 
10
 
11
  TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
12
  TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
 
21
  @classmethod
22
  def create(
23
  cls,
24
+ question: str,
25
  answer: str,
26
  raw_question: str,
27
  raw_choices: list[str],
 
33
  f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
34
  )
35
  return cls(
36
+ question=question,
37
  answer=answer,
38
  raw_question=raw_question,
39
  raw_answer=raw_answer,
 
48
  _data_source = "lianghsun/tw-legal-benchmark-v1"
49
  _default_task = "default"
50
  _task_names = ["default"]
 
51
 
52
  def process_entry(
53
  self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
 
62
  raw_question = row["question"]
63
  raw_answer = row["answer"]
64
 
65
+ question = f"Question: {raw_question}\n{choices}\nAnswer:"
66
 
67
  return TWLegalParseEntry.create(
68
+ question=question,
69
  answer=raw_answer,
70
  raw_question=raw_question,
71
  raw_choices=raw_choices,
 
117
  if parsed_data:
118
  example = parsed_data[0]
119
  print("\nExample parsed entry:")
120
+ print(f"Question: {example.question}")
121
  print("Choices:")
122
  for i, choice in enumerate(example.raw_choices):
123
  print(f"{chr(65 + i)}. {choice}")
tests/test_bbh_parser.py CHANGED
@@ -28,14 +28,14 @@ def sample_row():
28
  def test_bbh_parse_entry_creation_valid():
29
  """Test valid creation of BBHParseEntry."""
30
  entry = BBHParseEntry.create(
31
- prompt="Test prompt",
32
  answer="A",
33
  raw_question="Test question",
34
  raw_answer="(A)",
35
  task_name="reasoning_about_colored_objects",
36
  )
37
  assert isinstance(entry, BBHParseEntry)
38
- assert entry.prompt == "Test prompt"
39
  assert entry.answer == "A"
40
  assert entry.raw_question == "Test question"
41
  assert entry.raw_answer == "(A)"
@@ -76,7 +76,6 @@ def test_full_parse_workflow(loaded_bbh_parser):
76
  assert isinstance(first_entry, BBHParseEntry)
77
  assert first_entry.task_name == "reasoning_about_colored_objects"
78
  assert first_entry.answer.strip("()").isalpha() # Should be a single letter
79
- assert first_entry.prompt.startswith(loaded_bbh_parser._system_prompt)
80
 
81
 
82
  def test_process_entry(bbh_parser, sample_row):
@@ -87,9 +86,8 @@ def test_process_entry(bbh_parser, sample_row):
87
 
88
  assert isinstance(entry, BBHParseEntry)
89
  assert entry.answer == "A" # Stripped from "(A)"
90
- assert "What color is the sky" in entry.raw_question
91
  assert entry.raw_answer == "(A)"
92
- assert bbh_parser._system_prompt in entry.prompt
93
  assert entry.task_name == "reasoning_about_colored_objects"
94
 
95
 
 
28
  def test_bbh_parse_entry_creation_valid():
29
  """Test valid creation of BBHParseEntry."""
30
  entry = BBHParseEntry.create(
31
+ question="Test question",
32
  answer="A",
33
  raw_question="Test question",
34
  raw_answer="(A)",
35
  task_name="reasoning_about_colored_objects",
36
  )
37
  assert isinstance(entry, BBHParseEntry)
38
+ assert entry.question == "Test question"
39
  assert entry.answer == "A"
40
  assert entry.raw_question == "Test question"
41
  assert entry.raw_answer == "(A)"
 
76
  assert isinstance(first_entry, BBHParseEntry)
77
  assert first_entry.task_name == "reasoning_about_colored_objects"
78
  assert first_entry.answer.strip("()").isalpha() # Should be a single letter
 
79
 
80
 
81
  def test_process_entry(bbh_parser, sample_row):
 
86
 
87
  assert isinstance(entry, BBHParseEntry)
88
  assert entry.answer == "A" # Stripped from "(A)"
89
+ assert "What color is the sky" in entry.question
90
  assert entry.raw_answer == "(A)"
 
91
  assert entry.task_name == "reasoning_about_colored_objects"
92
 
93
 
tests/test_gsm8k_parser.py CHANGED
@@ -30,7 +30,7 @@ def sample_row():
30
  def test_gsm8k_parse_entry_creation_valid():
31
  """Test valid creation of GSM8KParseEntry."""
32
  entry = GSM8KParseEntry.create(
33
- prompt="Test prompt",
34
  answer="5",
35
  raw_question="Test question",
36
  raw_answer="Solution steps #### 5",
@@ -39,7 +39,7 @@ def test_gsm8k_parse_entry_creation_valid():
39
  numerical_answer=5,
40
  )
41
  assert isinstance(entry, GSM8KParseEntry)
42
- assert entry.prompt == "Test prompt"
43
  assert entry.answer == "5"
44
  assert entry.solution == "Solution steps"
45
  assert entry.numerical_answer == 5
@@ -83,7 +83,6 @@ def test_full_parse_workflow(loaded_gsm8k_parser):
83
  assert isinstance(first_entry.numerical_answer, (str, int, float))
84
  assert "####" in first_entry.raw_answer
85
  assert first_entry.solution
86
- assert first_entry.prompt.startswith(loaded_gsm8k_parser._system_prompt)
87
 
88
 
89
  def test_process_entry(gsm8k_parser, sample_row):
@@ -95,7 +94,6 @@ def test_process_entry(gsm8k_parser, sample_row):
95
  assert "Janet has 3 apples" in entry.raw_question
96
  assert "#### 5" in entry.raw_answer
97
  assert "Let's solve this step by step:" in entry.solution
98
- assert gsm8k_parser._system_prompt in entry.prompt
99
  assert entry.task_name == "main"
100
 
101
 
 
30
  def test_gsm8k_parse_entry_creation_valid():
31
  """Test valid creation of GSM8KParseEntry."""
32
  entry = GSM8KParseEntry.create(
33
+ question="Test question",
34
  answer="5",
35
  raw_question="Test question",
36
  raw_answer="Solution steps #### 5",
 
39
  numerical_answer=5,
40
  )
41
  assert isinstance(entry, GSM8KParseEntry)
42
+ assert entry.question == "Test question"
43
  assert entry.answer == "5"
44
  assert entry.solution == "Solution steps"
45
  assert entry.numerical_answer == 5
 
83
  assert isinstance(first_entry.numerical_answer, (str, int, float))
84
  assert "####" in first_entry.raw_answer
85
  assert first_entry.solution
 
86
 
87
 
88
  def test_process_entry(gsm8k_parser, sample_row):
 
94
  assert "Janet has 3 apples" in entry.raw_question
95
  assert "#### 5" in entry.raw_answer
96
  assert "Let's solve this step by step:" in entry.solution
 
97
  assert entry.task_name == "main"
98
 
99
 
tests/test_humaneval_parser.py CHANGED
@@ -42,7 +42,7 @@ def plus_sample_entry():
42
  def test_humaneval_parse_entry_creation():
43
  """Test creation of HumanEvalParseEntry"""
44
  entry = HumanEvalParseEntry.create(
45
- prompt="test prompt",
46
  answer="test answer",
47
  raw_question="raw question",
48
  task_id="HumanEval/1",
@@ -51,7 +51,7 @@ def test_humaneval_parse_entry_creation():
51
  task_name="openai_humaneval",
52
  )
53
 
54
- assert entry.prompt == "test prompt"
55
  assert entry.answer == "test answer"
56
  assert entry.raw_question == "raw question"
57
  assert entry.raw_answer == "test answer" # Should match answer
@@ -65,7 +65,7 @@ def test_humaneval_parse_entry_validation():
65
  """Test validation of required fields"""
66
  with pytest.raises(ValueError, match="Task ID cannot be empty"):
67
  HumanEvalParseEntry.create(
68
- prompt="test",
69
  answer="test",
70
  raw_question="test",
71
  task_id="", # Empty task_id should raise error
@@ -76,7 +76,7 @@ def test_humaneval_parse_entry_validation():
76
 
77
  with pytest.raises(ValueError, match="Entry point cannot be empty"):
78
  HumanEvalParseEntry.create(
79
- prompt="test",
80
  answer="test",
81
  raw_question="test",
82
  task_id="test",
@@ -93,9 +93,7 @@ def test_process_entry(parser, sample_entry):
93
  assert isinstance(result, HumanEvalParseEntry)
94
  assert result.task_id == "HumanEval/0"
95
  assert result.entry_point == "add"
96
- assert (
97
- result.prompt == f"{parser._default_system_prompt}\n\n{sample_entry['prompt']}"
98
- )
99
  assert result.answer == sample_entry["canonical_solution"]
100
  assert result.test == sample_entry["test"]
101
  assert result.task_name == "openai_humaneval"
@@ -147,10 +145,7 @@ def test_plus_process_entry(plus_parser, plus_sample_entry):
147
  assert isinstance(result, HumanEvalParseEntry)
148
  assert result.task_id == "HumanEval/0"
149
  assert result.entry_point == "add"
150
- assert (
151
- result.prompt
152
- == f"{plus_parser._default_system_prompt}\n\n{plus_sample_entry['prompt']}"
153
- )
154
  assert result.answer == plus_sample_entry["canonical_solution"]
155
  assert result.test == plus_sample_entry["test"]
156
  assert result.task_name == "default"
@@ -191,7 +186,7 @@ def test_get_dataset_description(parser, plus_parser):
191
  assert "evalplus" in plus_description.citation
192
 
193
 
194
- def test_get_evaluation_metrics(parser, plus_parser):
195
  """Test evaluation metrics generation for both parsers."""
196
  # Test original HumanEval metrics
197
  metrics = parser.get_evaluation_metrics()
 
42
  def test_humaneval_parse_entry_creation():
43
  """Test creation of HumanEvalParseEntry"""
44
  entry = HumanEvalParseEntry.create(
45
+ question="test question",
46
  answer="test answer",
47
  raw_question="raw question",
48
  task_id="HumanEval/1",
 
51
  task_name="openai_humaneval",
52
  )
53
 
54
+ assert entry.question == "test question"
55
  assert entry.answer == "test answer"
56
  assert entry.raw_question == "raw question"
57
  assert entry.raw_answer == "test answer" # Should match answer
 
65
  """Test validation of required fields"""
66
  with pytest.raises(ValueError, match="Task ID cannot be empty"):
67
  HumanEvalParseEntry.create(
68
+ question="test",
69
  answer="test",
70
  raw_question="test",
71
  task_id="", # Empty task_id should raise error
 
76
 
77
  with pytest.raises(ValueError, match="Entry point cannot be empty"):
78
  HumanEvalParseEntry.create(
79
+ question="test",
80
  answer="test",
81
  raw_question="test",
82
  task_id="test",
 
93
  assert isinstance(result, HumanEvalParseEntry)
94
  assert result.task_id == "HumanEval/0"
95
  assert result.entry_point == "add"
96
+
 
 
97
  assert result.answer == sample_entry["canonical_solution"]
98
  assert result.test == sample_entry["test"]
99
  assert result.task_name == "openai_humaneval"
 
145
  assert isinstance(result, HumanEvalParseEntry)
146
  assert result.task_id == "HumanEval/0"
147
  assert result.entry_point == "add"
148
+
 
 
 
149
  assert result.answer == plus_sample_entry["canonical_solution"]
150
  assert result.test == plus_sample_entry["test"]
151
  assert result.task_name == "default"
 
186
  assert "evalplus" in plus_description.citation
187
 
188
 
189
+ def test_get_evaluation_metrics(parser):
190
  """Test evaluation metrics generation for both parsers."""
191
  # Test original HumanEval metrics
192
  metrics = parser.get_evaluation_metrics()
tests/test_ifeval_parser.py CHANGED
@@ -31,7 +31,7 @@ def ifeval_parser():
31
  def test_ifeval_parse_entry_creation_valid():
32
  """Test valid creation of IFEvalParseEntry."""
33
  entry = IFEvalParseEntry.create(
34
- prompt="Test system prompt\n\nTest instruction",
35
  answer="", # IFEval doesn't have answers
36
  raw_question="Test instruction",
37
  raw_answer="",
@@ -42,7 +42,7 @@ def test_ifeval_parse_entry_creation_valid():
42
  )
43
 
44
  assert isinstance(entry, IFEvalParseEntry)
45
- assert entry.prompt == "Test system prompt\n\nTest instruction"
46
  assert entry.answer == ""
47
  assert entry.key == 1
48
  assert entry.instruction_id_list == ["test_001", "test_002"]
 
31
  def test_ifeval_parse_entry_creation_valid():
32
  """Test valid creation of IFEvalParseEntry."""
33
  entry = IFEvalParseEntry.create(
34
+ question="Test instruction",
35
  answer="", # IFEval doesn't have answers
36
  raw_question="Test instruction",
37
  raw_answer="",
 
42
  )
43
 
44
  assert isinstance(entry, IFEvalParseEntry)
45
+ assert entry.question == "Test instruction"
46
  assert entry.answer == ""
47
  assert entry.key == 1
48
  assert entry.instruction_id_list == ["test_001", "test_002"]
tests/test_math_parser.py CHANGED
@@ -44,7 +44,7 @@ def sample_math_entries():
44
  def test_math_parse_entry_creation_valid():
45
  """Test valid creation of MATHParseEntry with all fields."""
46
  entry = MATHParseEntry.create(
47
- prompt="Test prompt",
48
  answer="Test answer",
49
  raw_question="Test question",
50
  raw_answer="Test solution",
@@ -54,7 +54,7 @@ def test_math_parse_entry_creation_valid():
54
  )
55
 
56
  assert isinstance(entry, MATHParseEntry)
57
- assert entry.prompt == "Test prompt"
58
  assert entry.answer == "Test answer"
59
  assert entry.raw_question == "Test question"
60
  assert entry.raw_answer == "Test solution"
@@ -85,9 +85,7 @@ def test_process_entry(math_parser, test_case):
85
  entry = math_parser.process_entry(test_case, task_name=test_case["type"])
86
 
87
  assert isinstance(entry, MATHParseEntry)
88
- assert (
89
- entry.prompt == f"{math_parser._default_system_prompt}\n{test_case['problem']}"
90
- )
91
  assert entry.answer == test_case["solution"]
92
  assert entry.raw_question == test_case["problem"]
93
  assert entry.raw_answer == test_case["solution"]
@@ -108,7 +106,6 @@ def test_math_parser_initialization(math_parser):
108
  math_parser.get_huggingface_link
109
  == "https://huggingface.co/datasets/lighteval/MATH"
110
  )
111
- assert "mathematics problem" in math_parser._default_system_prompt.lower()
112
 
113
 
114
  def test_get_current_task(math_parser):
 
44
  def test_math_parse_entry_creation_valid():
45
  """Test valid creation of MATHParseEntry with all fields."""
46
  entry = MATHParseEntry.create(
47
+ question="Test question",
48
  answer="Test answer",
49
  raw_question="Test question",
50
  raw_answer="Test solution",
 
54
  )
55
 
56
  assert isinstance(entry, MATHParseEntry)
57
+ assert entry.question == "Test question"
58
  assert entry.answer == "Test answer"
59
  assert entry.raw_question == "Test question"
60
  assert entry.raw_answer == "Test solution"
 
85
  entry = math_parser.process_entry(test_case, task_name=test_case["type"])
86
 
87
  assert isinstance(entry, MATHParseEntry)
88
+
 
 
89
  assert entry.answer == test_case["solution"]
90
  assert entry.raw_question == test_case["problem"]
91
  assert entry.raw_answer == test_case["solution"]
 
106
  math_parser.get_huggingface_link
107
  == "https://huggingface.co/datasets/lighteval/MATH"
108
  )
 
109
 
110
 
111
  def test_get_current_task(math_parser):
tests/test_mbpp_parser.py CHANGED
@@ -23,7 +23,7 @@ def parser():
23
  def test_mbpp_parse_entry_creation():
24
  """Test creation of MBPPParseEntry"""
25
  entry = MBPPParseEntry.create(
26
- prompt="test prompt",
27
  answer="test answer",
28
  raw_question="raw question",
29
  task_id=42,
@@ -34,7 +34,7 @@ def test_mbpp_parse_entry_creation():
34
  source_file="test.pdf",
35
  )
36
 
37
- assert entry.prompt == "test prompt"
38
  assert entry.answer == "test answer"
39
  assert entry.raw_question == "raw question"
40
  assert entry.raw_answer == "test answer"
@@ -49,7 +49,7 @@ def test_mbpp_parse_entry_validation():
49
  """Test validation of required fields"""
50
  with pytest.raises(ValueError, match="Task ID must be an integer"):
51
  MBPPParseEntry.create(
52
- prompt="test",
53
  answer="test",
54
  raw_question="test",
55
  task_id="not_an_int", # Invalid task_id type
@@ -71,8 +71,6 @@ def test_process_entry(parser, sample_entry):
71
  assert result.answer == sample_entry["code"]
72
  assert result.test_list == sample_entry["test_list"]
73
  assert result.challenge_test_list == sample_entry["challenge_test_list"]
74
- expected_prompt = f"{parser._system_prompt}\n\nTask: {sample_entry['text']}"
75
- assert result.prompt == expected_prompt
76
  assert result.task_name == "full"
77
 
78
 
@@ -142,18 +140,6 @@ def test_full_workflow_with_different_splits(parser):
142
  assert all(entry.task_name == "full" for entry in train_data)
143
 
144
 
145
- def test_custom_system_prompt():
146
- """Test parser initialization with custom system prompt"""
147
- custom_prompt = "Custom system prompt"
148
- parser = MBPPDatasetParser(system_prompt=custom_prompt)
149
- assert parser._system_prompt == custom_prompt
150
-
151
-
152
- def test_default_system_prompt(parser):
153
- """Test parser uses default system prompt when none provided"""
154
- assert parser._system_prompt == parser._default_system_prompt
155
-
156
-
157
  def test_get_dataset_description(parser):
158
  """Test dataset description generation."""
159
  description = parser.get_dataset_description()
 
23
  def test_mbpp_parse_entry_creation():
24
  """Test creation of MBPPParseEntry"""
25
  entry = MBPPParseEntry.create(
26
+ question="test question",
27
  answer="test answer",
28
  raw_question="raw question",
29
  task_id=42,
 
34
  source_file="test.pdf",
35
  )
36
 
37
+ assert entry.question == "test question"
38
  assert entry.answer == "test answer"
39
  assert entry.raw_question == "raw question"
40
  assert entry.raw_answer == "test answer"
 
49
  """Test validation of required fields"""
50
  with pytest.raises(ValueError, match="Task ID must be an integer"):
51
  MBPPParseEntry.create(
52
+ question="test",
53
  answer="test",
54
  raw_question="test",
55
  task_id="not_an_int", # Invalid task_id type
 
71
  assert result.answer == sample_entry["code"]
72
  assert result.test_list == sample_entry["test_list"]
73
  assert result.challenge_test_list == sample_entry["challenge_test_list"]
 
 
74
  assert result.task_name == "full"
75
 
76
 
 
140
  assert all(entry.task_name == "full" for entry in train_data)
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def test_get_dataset_description(parser):
144
  """Test dataset description generation."""
145
  description = parser.get_dataset_description()
tests/test_mgsm_parser.py CHANGED
@@ -47,7 +47,7 @@ def sample_mgsm_entries():
47
  def test_mgsm_parse_entry_creation_valid():
48
  """Test valid creation of MGSMParseEntry with all fields."""
49
  entry = MGSMParseEntry.create(
50
- prompt="Test prompt",
51
  answer="Test answer",
52
  raw_question="Test question",
53
  raw_answer="Test answer",
@@ -58,7 +58,7 @@ def test_mgsm_parse_entry_creation_valid():
58
  )
59
 
60
  assert isinstance(entry, MGSMParseEntry)
61
- assert entry.prompt == "Test prompt"
62
  assert entry.answer == "Test answer"
63
  assert entry.raw_question == "Test question"
64
  assert entry.raw_answer == "Test answer"
@@ -168,22 +168,6 @@ def test_supported_languages(mgsm_parser, language):
168
  assert entry.numerical_answer == 42
169
 
170
 
171
- def test_system_prompt_override(mgsm_parser):
172
- """Test overriding the default system prompt."""
173
- custom_prompt = "Custom system prompt for testing"
174
- parser = MGSMDatasetParser(system_prompt=custom_prompt)
175
-
176
- test_entry = {
177
- "question": "Test question",
178
- "answer": "Test answer",
179
- "answer_number": 42,
180
- "equation_solution": "42",
181
- }
182
-
183
- entry = parser.process_entry(test_entry, task_name="en")
184
- assert custom_prompt in entry.prompt
185
-
186
-
187
  def test_get_dataset_description(mgsm_parser):
188
  """Test dataset description generation."""
189
  description = mgsm_parser.get_dataset_description()
 
47
  def test_mgsm_parse_entry_creation_valid():
48
  """Test valid creation of MGSMParseEntry with all fields."""
49
  entry = MGSMParseEntry.create(
50
+ question="Test question",
51
  answer="Test answer",
52
  raw_question="Test question",
53
  raw_answer="Test answer",
 
58
  )
59
 
60
  assert isinstance(entry, MGSMParseEntry)
61
+ assert entry.question == "Test question"
62
  assert entry.answer == "Test answer"
63
  assert entry.raw_question == "Test question"
64
  assert entry.raw_answer == "Test answer"
 
168
  assert entry.numerical_answer == 42
169
 
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def test_get_dataset_description(mgsm_parser):
172
  """Test dataset description generation."""
173
  description = mgsm_parser.get_dataset_description()
tests/test_mmlu_parser.py CHANGED
@@ -70,7 +70,7 @@ def sample_mmlu_pro_entries():
70
  def test_mmlu_parse_entry_creation_valid():
71
  """Test valid creation of MMLUParseEntry."""
72
  entry = MMLUParseEntry.create(
73
- prompt="Test prompt",
74
  answer="A",
75
  raw_question="Test question",
76
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -78,7 +78,7 @@ def test_mmlu_parse_entry_creation_valid():
78
  task_name="test_task",
79
  )
80
  assert isinstance(entry, MMLUParseEntry)
81
- assert entry.prompt == "Test prompt"
82
  assert entry.answer == "A"
83
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
84
  assert entry.task_name == "test_task"
@@ -91,7 +91,7 @@ def test_mmlu_parse_entry_creation_invalid(invalid_answer):
91
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
92
  ):
93
  MMLUParseEntry.create(
94
- prompt="Test prompt",
95
  answer=invalid_answer,
96
  raw_question="Test question",
97
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -106,10 +106,10 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
106
 
107
  assert isinstance(entry, MMLUParseEntry)
108
  assert entry.answer == "B" # Index 1 maps to B
109
- assert "A. London" in entry.prompt
110
- assert "B. Paris" in entry.prompt
111
- assert "C. Berlin" in entry.prompt
112
- assert "D. Madrid" in entry.prompt
113
  assert entry.raw_question == "What is the capital of France?"
114
  assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
115
  assert entry.raw_answer == "1"
@@ -119,7 +119,7 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
119
  def test_mmlu_pro_parse_entry_creation_valid():
120
  """Test valid creation of MMLUProParseEntry."""
121
  entry = MMLUProParseEntry.create(
122
- prompt="Test prompt",
123
  answer="E", # MMLU Pro supports up to J
124
  raw_question="Test question",
125
  raw_choices=["choice1", "choice2", "choice3", "choice4", "choice5"],
@@ -139,7 +139,7 @@ def test_process_entry_mmlu_pro(mmlu_pro_parser, sample_mmlu_pro_entries):
139
 
140
  assert isinstance(entry, MMLUProParseEntry)
141
  assert entry.answer == "B" # Index 1 maps to B
142
- assert "O(n log n)" in entry.prompt
143
  assert entry.task_name == "computer_science"
144
  assert len(entry.raw_choices) == 6
145
 
 
70
  def test_mmlu_parse_entry_creation_valid():
71
  """Test valid creation of MMLUParseEntry."""
72
  entry = MMLUParseEntry.create(
73
+ question="Test question",
74
  answer="A",
75
  raw_question="Test question",
76
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
78
  task_name="test_task",
79
  )
80
  assert isinstance(entry, MMLUParseEntry)
81
+ assert entry.question == "Test question"
82
  assert entry.answer == "A"
83
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
84
  assert entry.task_name == "test_task"
 
91
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
92
  ):
93
  MMLUParseEntry.create(
94
+ question="Test question",
95
  answer=invalid_answer,
96
  raw_question="Test question",
97
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
106
 
107
  assert isinstance(entry, MMLUParseEntry)
108
  assert entry.answer == "B" # Index 1 maps to B
109
+ assert "A. London" in entry.question
110
+ assert "B. Paris" in entry.question
111
+ assert "C. Berlin" in entry.question
112
+ assert "D. Madrid" in entry.question
113
  assert entry.raw_question == "What is the capital of France?"
114
  assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
115
  assert entry.raw_answer == "1"
 
119
  def test_mmlu_pro_parse_entry_creation_valid():
120
  """Test valid creation of MMLUProParseEntry."""
121
  entry = MMLUProParseEntry.create(
122
+ question="Test question",
123
  answer="E", # MMLU Pro supports up to J
124
  raw_question="Test question",
125
  raw_choices=["choice1", "choice2", "choice3", "choice4", "choice5"],
 
139
 
140
  assert isinstance(entry, MMLUProParseEntry)
141
  assert entry.answer == "B" # Index 1 maps to B
142
+ assert "O(n log n)" in entry.question
143
  assert entry.task_name == "computer_science"
144
  assert len(entry.raw_choices) == 6
145
 
tests/test_tmlu_parser.py CHANGED
@@ -47,7 +47,7 @@ def sample_tmlu_entries():
47
  def test_tmlu_parse_entry_creation_valid():
48
  """Test valid creation of TMLUParseEntry."""
49
  entry = TMLUParseEntry.create(
50
- prompt="Test prompt",
51
  answer="A",
52
  raw_question="Test question",
53
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -57,7 +57,7 @@ def test_tmlu_parse_entry_creation_valid():
57
  metadata={"source": "test"},
58
  )
59
  assert isinstance(entry, TMLUParseEntry)
60
- assert entry.prompt == "Test prompt"
61
  assert entry.answer == "A"
62
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
63
  assert entry.explanation == "Test explanation"
@@ -71,7 +71,7 @@ def test_tmlu_parse_entry_creation_invalid(invalid_answer):
71
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
72
  ):
73
  TMLUParseEntry.create(
74
- prompt="Test prompt",
75
  answer=invalid_answer,
76
  raw_question="Test question",
77
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -140,26 +140,6 @@ def test_different_tasks_parsing(tmlu_parser):
140
  assert math_count > 0
141
 
142
 
143
- def test_system_prompt_override(tmlu_parser):
144
- """Test overriding the default system prompt."""
145
- custom_prompt = "Custom system prompt for testing"
146
- parser = TMLUDatasetParser(system_prompt=custom_prompt)
147
-
148
- test_entry = {
149
- "question": "Test question",
150
- "A": "Choice A",
151
- "B": "Choice B",
152
- "C": "Choice C",
153
- "D": "Choice D",
154
- "answer": "A",
155
- "explanation": "Test explanation",
156
- "metadata": {"source": "test"},
157
- }
158
-
159
- entry = parser.process_entry(test_entry)
160
- assert custom_prompt in entry.prompt
161
-
162
-
163
  def test_metadata_handling(tmlu_parser, sample_tmlu_entries):
164
  """Test proper handling of metadata in entries."""
165
  entry = tmlu_parser.process_entry(sample_tmlu_entries[0])
 
47
  def test_tmlu_parse_entry_creation_valid():
48
  """Test valid creation of TMLUParseEntry."""
49
  entry = TMLUParseEntry.create(
50
+ question="Test question",
51
  answer="A",
52
  raw_question="Test question",
53
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
57
  metadata={"source": "test"},
58
  )
59
  assert isinstance(entry, TMLUParseEntry)
60
+ assert entry.question == "Test question"
61
  assert entry.answer == "A"
62
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
63
  assert entry.explanation == "Test explanation"
 
71
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
72
  ):
73
  TMLUParseEntry.create(
74
+ question="Test question",
75
  answer=invalid_answer,
76
  raw_question="Test question",
77
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
140
  assert math_count > 0
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def test_metadata_handling(tmlu_parser, sample_tmlu_entries):
144
  """Test proper handling of metadata in entries."""
145
  entry = tmlu_parser.process_entry(sample_tmlu_entries[0])
tests/test_tw_legal_parser.py CHANGED
@@ -35,7 +35,7 @@ def sample_tw_legal_entries():
35
  def test_tw_legal_parse_entry_creation_valid():
36
  """Test valid creation of TWLegalParseEntry."""
37
  entry = TWLegalParseEntry.create(
38
- prompt="Test prompt",
39
  answer="A",
40
  raw_question="Test question",
41
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -43,7 +43,7 @@ def test_tw_legal_parse_entry_creation_valid():
43
  task_name="default",
44
  )
45
  assert isinstance(entry, TWLegalParseEntry)
46
- assert entry.prompt == "Test prompt"
47
  assert entry.answer == "A"
48
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
49
 
@@ -55,7 +55,7 @@ def test_tw_legal_parse_entry_creation_invalid(invalid_answer):
55
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
56
  ):
57
  TWLegalParseEntry.create(
58
- prompt="Test prompt",
59
  answer=invalid_answer,
60
  raw_question="Test question",
61
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -70,10 +70,10 @@ def test_process_entry(tw_legal_parser, sample_tw_legal_entries):
70
 
71
  assert isinstance(entry, TWLegalParseEntry)
72
  assert entry.answer == "D"
73
- assert "A. 法人於法令限制內,有享受權利負擔義務之能力" in entry.prompt
74
- assert "B. 法人因目的之達到而消滅" in entry.prompt
75
- assert "C. 法人非依法律之規定,不得成立" in entry.prompt
76
- assert "D. 法人於登記前,即取得權利能力" in entry.prompt
77
  assert entry.raw_question == "依民法規定,下列關於法人之敘述,何者錯誤?"
78
  assert len(entry.raw_choices) == 4
79
 
@@ -122,24 +122,6 @@ def test_data_parsing(tw_legal_parser):
122
  assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
123
 
124
 
125
- def test_system_prompt_override(tw_legal_parser):
126
- """Test overriding the default system prompt."""
127
- custom_prompt = "Custom system prompt for testing"
128
- parser = TWLegalDatasetParser(system_prompt=custom_prompt)
129
-
130
- test_entry = {
131
- "question": "Test question",
132
- "A": "Choice A",
133
- "B": "Choice B",
134
- "C": "Choice C",
135
- "D": "Choice D",
136
- "answer": "A",
137
- }
138
-
139
- entry = parser.process_entry(test_entry)
140
- assert custom_prompt in entry.prompt
141
-
142
-
143
  def test_get_dataset_description(tw_legal_parser):
144
  """Test getting dataset description for Taiwan Legal parser."""
145
  description = tw_legal_parser.get_dataset_description()
 
35
  def test_tw_legal_parse_entry_creation_valid():
36
  """Test valid creation of TWLegalParseEntry."""
37
  entry = TWLegalParseEntry.create(
38
+ question="Test question",
39
  answer="A",
40
  raw_question="Test question",
41
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
43
  task_name="default",
44
  )
45
  assert isinstance(entry, TWLegalParseEntry)
46
+ assert entry.question == "Test question"
47
  assert entry.answer == "A"
48
  assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
49
 
 
55
  ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
56
  ):
57
  TWLegalParseEntry.create(
58
+ question="Test question",
59
  answer=invalid_answer,
60
  raw_question="Test question",
61
  raw_choices=["choice1", "choice2", "choice3", "choice4"],
 
70
 
71
  assert isinstance(entry, TWLegalParseEntry)
72
  assert entry.answer == "D"
73
+ assert "A. 法人於法令限制內,有享受權利負擔義務之能力" in entry.question
74
+ assert "B. 法人因目的之達到而消滅" in entry.question
75
+ assert "C. 法人非依法律之規定,不得成立" in entry.question
76
+ assert "D. 法人於登記前,即取得權利能力" in entry.question
77
  assert entry.raw_question == "依民法規定,下列關於法人之敘述,何者錯誤?"
78
  assert len(entry.raw_choices) == 4
79
 
 
122
  assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
123
 
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def test_get_dataset_description(tw_legal_parser):
126
  """Test getting dataset description for Taiwan Legal parser."""
127
  description = tw_legal_parser.get_dataset_description()