Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Sleeping

App Files Files Community

JeffYang52415 commited on Dec 30, 2024

Commit

0450c4e

unverified ·

1 Parent(s): a6c5f53

refactor: remove system prompt

Browse files

Files changed (23) hide show

.gitignore +4 -0
llmdataparser/base_parser.py +2 -8
llmdataparser/bbh_parser.py +5 -8
llmdataparser/gsm8k_parser.py +5 -7
llmdataparser/humaneval_parser.py +7 -13
llmdataparser/ifeval_parser.py +5 -8
llmdataparser/math_parser.py +5 -7
llmdataparser/mbpp_parser.py +4 -7
llmdataparser/mgsm_parser.py +6 -9
llmdataparser/mmlu_parser.py +12 -16
llmdataparser/prompts.py +16 -115
llmdataparser/tmlu_parser.py +5 -7
llmdataparser/tw_legal_parser.py +5 -7
tests/test_bbh_parser.py +3 -5
tests/test_gsm8k_parser.py +2 -4
tests/test_humaneval_parser.py +7 -12
tests/test_ifeval_parser.py +2 -2
tests/test_math_parser.py +3 -6
tests/test_mbpp_parser.py +3 -17
tests/test_mgsm_parser.py +2 -18
tests/test_mmlu_parser.py +9 -9
tests/test_tmlu_parser.py +3 -23
tests/test_tw_legal_parser.py +7 -25

.gitignore CHANGED Viewed

@@ -38,3 +38,7 @@ dist/
 #notebook cache
 .ipynb_checkpoints/
 notebooks/

 #notebook cache
 .ipynb_checkpoints/
 notebooks/
+#coverage
+.coverage
+.coverage.*

llmdataparser/base_parser.py CHANGED Viewed

@@ -25,7 +25,7 @@ VALID_CATEGORIES = {
 class ParseEntry:
     """A simple base class for entries, customizable by each dataset parser."""
-    prompt: str
     answer: str
     raw_question: str
     raw_answer: str
@@ -166,18 +166,14 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
     _task_names: ClassVar[list[str]]
     # _default_task is the default task to use if no task is specified, e.g. "algebra"
     _default_task: ClassVar[str]
-    # _default_system_prompt is the default system prompt to use if no system prompt is specified
-    _default_system_prompt: ClassVar[str]
     # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
     _hidden_task_names: ClassVar[list[str]] = []
-    def __init__(self, system_prompt: str | None = None, **kwargs: Any) -> None:
         """
         Initialize a HuggingFaceDatasetParser.
         Args:
-            system_prompt: Optional custom system prompt to use instead of the default.
-                         If not provided, will use the class's _default_system_prompt.
             **kwargs: Additional keyword arguments passed to the parent class.
         """
         super().__init__()
@@ -187,8 +183,6 @@ class HuggingFaceDatasetParser(DatasetParser[T]):
         self.split_names: list[str] = []
         # _current_task is the task currently being processed, e.g. "algebra"
         self._current_task: str = ""
-        # _system_prompt is the system prompt currently being used
-        self._system_prompt: str = system_prompt or self._default_system_prompt
     def _get_current_task(self, data_entry: dict[str, Any] | None = None) -> str:
         """

 class ParseEntry:
     """A simple base class for entries, customizable by each dataset parser."""
+    question: str
     answer: str
     raw_question: str
     raw_answer: str
     _task_names: ClassVar[list[str]]
     # _default_task is the default task to use if no task is specified, e.g. "algebra"
     _default_task: ClassVar[str]
     # _hidden_task_names is the list of task names that are hidden in the dataset, e.g. ["math", "physics", "chemistry"]
     _hidden_task_names: ClassVar[list[str]] = []
+    def __init__(self, **kwargs: Any) -> None:
         """
         Initialize a HuggingFaceDatasetParser.
         Args:
             **kwargs: Additional keyword arguments passed to the parent class.
         """
         super().__init__()
         self.split_names: list[str] = []
         # _current_task is the task currently being processed, e.g. "algebra"
         self._current_task: str = ""
     def _get_current_task(self, data_entry: dict[str, Any] | None = None) -> str:
         """

llmdataparser/bbh_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import BBH_SYSTEM_PROMPT  # You'll need to create this
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -17,14 +16,14 @@ class BBHParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         task_name: str,
     ) -> "BBHParseEntry":
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -66,7 +65,6 @@ class BBHDatasetParser(HuggingFaceDatasetParser[BBHParseEntry]):
         "word_sorting",
     ]
     _default_task: ClassVar[str] = "reasoning_about_colored_objects"
-    _default_system_prompt: ClassVar[str] = BBH_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -78,14 +76,13 @@ class BBHDatasetParser(HuggingFaceDatasetParser[BBHParseEntry]):
         # Remove parentheses from the answer
         clean_answer = raw_answer.strip("()")
-        # Combine system prompt with the question
-        prompt = f"{self._system_prompt}\n\n{raw_question}"
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return BBHParseEntry.create(
-            prompt=prompt,
             answer=clean_answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -176,5 +173,5 @@ if __name__ == "__main__":
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
-        print(f"Question: {example.raw_question}")
         print(f"Answer: {example.answer}")

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         task_name: str,
     ) -> "BBHParseEntry":
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         "word_sorting",
     ]
     _default_task: ClassVar[str] = "reasoning_about_colored_objects"
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         # Remove parentheses from the answer
         clean_answer = raw_answer.strip("()")
+        question = str(raw_question)
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return BBHParseEntry.create(
+            question=question,
             answer=clean_answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
+        print(f"Question: {example.question}")
         print(f"Answer: {example.answer}")

llmdataparser/gsm8k_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import GSM8K_SYSTEM_PROMPT
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class GSM8KParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
@@ -30,7 +29,7 @@ class GSM8KParseEntry(HuggingFaceParseEntry):
         task_name: str,
     ) -> "GSM8KParseEntry":
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -46,7 +45,6 @@ class GSM8KDatasetParser(HuggingFaceDatasetParser[GSM8KParseEntry]):
     _data_source: ClassVar[str] = "openai/gsm8k"
     _task_names: ClassVar[list[str]] = ["main", "socratic"]
     _default_task: ClassVar[str] = "main"
-    _default_system_prompt: ClassVar[str] = GSM8K_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -69,10 +67,10 @@ class GSM8KDatasetParser(HuggingFaceDatasetParser[GSM8KParseEntry]):
         # Extract solution (everything before '####')
         solution = raw_answer.split("####")[0].strip()
-        prompt = f"{self._system_prompt}\n{raw_question}"
         return GSM8KParseEntry.create(
-            prompt=prompt,
             answer=str(numerical_answer),
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -145,7 +143,7 @@ if __name__ == "__main__":
     parser.parse()
     parsed_data = parser.get_parsed_data
-    pprint(parsed_data[0].prompt)
     pprint(parsed_data[0].answer)
     pprint(parsed_data[0].raw_question)
     pprint(parsed_data[0].raw_answer)

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         task_name: str,
     ) -> "GSM8KParseEntry":
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
     _data_source: ClassVar[str] = "openai/gsm8k"
     _task_names: ClassVar[list[str]] = ["main", "socratic"]
     _default_task: ClassVar[str] = "main"
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         # Extract solution (everything before '####')
         solution = raw_answer.split("####")[0].strip()
+        question = str(raw_question)
         return GSM8KParseEntry.create(
+            question=question,
             answer=str(numerical_answer),
             raw_question=raw_question,
             raw_answer=raw_answer,
     parser.parse()
     parsed_data = parser.get_parsed_data
+    pprint(parsed_data[0].question)
     pprint(parsed_data[0].answer)
     pprint(parsed_data[0].raw_question)
     pprint(parsed_data[0].raw_answer)

llmdataparser/humaneval_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import HUMANEVAL_SYSTEM_PROMPT
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -22,7 +21,7 @@ class HumanEvalParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         task_id: str,
@@ -35,7 +34,7 @@ class HumanEvalParseEntry(HuggingFaceParseEntry):
         if not entry_point:
             raise ValueError("Entry point cannot be empty")
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=answer,  # In HumanEval, the canonical solution is the raw answer
@@ -52,7 +51,6 @@ class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
     _data_source: ClassVar[str] = "openai/openai_humaneval"
     _default_task: ClassVar[str] = "openai_humaneval"
     _task_names: ClassVar[list[str]] = ["openai_humaneval"]
-    _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -64,14 +62,13 @@ class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
         entry_point = row["entry_point"]
         test = row["test"]
-        # Combine system prompt with the function signature and docstring
-        prompt = f"{self._system_prompt}\n\n{raw_question}"
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return HumanEvalParseEntry.create(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,
@@ -151,7 +148,6 @@ class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
     _data_source: ClassVar[str] = "evalplus/humanevalplus"
     _default_task: ClassVar[str] = "default"
     _task_names: ClassVar[list[str]] = ["default"]
-    _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -163,14 +159,12 @@ class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
         entry_point = row["entry_point"]
         test = row["test"]
-        # Combine system prompt with the function signature and docstring
-        prompt = f"{self._system_prompt}\n\n{raw_question}"
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return HumanEvalParseEntry.create(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,
@@ -264,7 +258,7 @@ if __name__ == "__main__":
         print("\nExample parsed entry:")
         print(f"Task ID: {example.task_id}")
         print(f"Entry Point: {example.entry_point}")
-        print(f"Prompt:\n{example.prompt}")
         print(f"Solution:\n{example.answer}")
     parser = HumanEvalDatasetPlusParser()

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         task_id: str,
         if not entry_point:
             raise ValueError("Entry point cannot be empty")
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=answer,  # In HumanEval, the canonical solution is the raw answer
     _data_source: ClassVar[str] = "openai/openai_humaneval"
     _default_task: ClassVar[str] = "openai_humaneval"
     _task_names: ClassVar[list[str]] = ["openai_humaneval"]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         entry_point = row["entry_point"]
         test = row["test"]
+        question = str(raw_question)
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return HumanEvalParseEntry.create(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,
     _data_source: ClassVar[str] = "evalplus/humanevalplus"
     _default_task: ClassVar[str] = "default"
     _task_names: ClassVar[list[str]] = ["default"]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         entry_point = row["entry_point"]
         test = row["test"]
+        question = str(raw_question)
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return HumanEvalParseEntry.create(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,
         print("\nExample parsed entry:")
         print(f"Task ID: {example.task_id}")
         print(f"Entry Point: {example.entry_point}")
+        print(f"Question:\n{example.question}")
         print(f"Solution:\n{example.answer}")
     parser = HumanEvalDatasetPlusParser()

llmdataparser/ifeval_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import IFEVAL_SYSTEM_PROMPT  # You'll need to create this
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class IFEvalParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
@@ -31,7 +30,7 @@ class IFEvalParseEntry(HuggingFaceParseEntry):
         task_name: str,
     ) -> "IFEvalParseEntry":
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -48,7 +47,6 @@ class IFEvalDatasetParser(HuggingFaceDatasetParser[IFEvalParseEntry]):
     _data_source: ClassVar[str] = "google/IFEval"
     _default_task: ClassVar[str] = "default"
     _task_names: ClassVar[list[str]] = ["default"]
-    _default_system_prompt: ClassVar[str] = IFEVAL_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -65,14 +63,13 @@ class IFEvalDatasetParser(HuggingFaceDatasetParser[IFEvalParseEntry]):
         answer = ""
         raw_answer = ""
-        # Combine system prompt with the instruction prompt
-        prompt = f"{self._system_prompt}\n\n{raw_question}"
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return IFEvalParseEntry.create(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -162,6 +159,6 @@ if __name__ == "__main__":
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Key: {example.key}")
-        print(f"Prompt: {example.prompt}")
         print(f"Instruction IDs: {example.instruction_id_list}")
         print(f"kwargs: {example.kwargs}")

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         task_name: str,
     ) -> "IFEvalParseEntry":
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
     _data_source: ClassVar[str] = "google/IFEval"
     _default_task: ClassVar[str] = "default"
     _task_names: ClassVar[list[str]] = ["default"]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         answer = ""
         raw_answer = ""
+        question = str(raw_question)
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         return IFEvalParseEntry.create(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Key: {example.key}")
+        print(f"Question: {example.question}")
         print(f"Instruction IDs: {example.instruction_id_list}")
         print(f"kwargs: {example.kwargs}")

llmdataparser/math_parser.py CHANGED Viewed

@@ -20,7 +20,7 @@ class MATHParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
@@ -29,7 +29,7 @@ class MATHParseEntry(HuggingFaceParseEntry):
         solution: str,
     ) -> "MATHParseEntry":
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -54,9 +54,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
         "all",
     ]
     _default_task: ClassVar[str] = "all"
-    _default_system_prompt: ClassVar[str] = (
-        "Solve the following mathematics problem step by step:"
-    )
     _valid_levels: ClassVar[set[str]] = {
         f"Level {i}" for i in range(1, 6)
     }  # Levels 1-5 are valid
@@ -80,7 +78,7 @@ class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
             level = "Unknown"
         return MATHParseEntry.create(
-            prompt=f"{self._system_prompt}\n{row['problem']}",
             answer=row["solution"],
             raw_question=row["problem"],
             raw_answer=row["solution"],
@@ -187,5 +185,5 @@ if __name__ == "__main__":
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
         print(f"Level: {example.level}")
-        print(f"Question: {example.raw_question}")
         print(f"Solution: {example.solution}")

     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         solution: str,
     ) -> "MATHParseEntry":
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         "all",
     ]
     _default_task: ClassVar[str] = "all"
     _valid_levels: ClassVar[set[str]] = {
         f"Level {i}" for i in range(1, 6)
     }  # Levels 1-5 are valid
             level = "Unknown"
         return MATHParseEntry.create(
+            question=str(row["problem"]),
             answer=row["solution"],
             raw_question=row["problem"],
             raw_answer=row["solution"],
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
         print(f"Level: {example.level}")
+        print(f"Question: {example.question}")
         print(f"Solution: {example.solution}")

llmdataparser/mbpp_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import MBPP_SYSTEM_PROMPT
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -23,7 +22,7 @@ class MBPPParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         task_id: int,
@@ -37,7 +36,7 @@ class MBPPParseEntry(HuggingFaceParseEntry):
             raise ValueError("Task ID must be an integer")
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=answer,  # In MBPP, the code solution is the raw answer
@@ -56,7 +55,6 @@ class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
     _data_source: ClassVar[str] = "google-research-datasets/mbpp"
     _default_task: ClassVar[str] = "full"  # Can be 'full' or 'sanitized'
     _task_names: ClassVar[list[str]] = ["full", "sanitized"]
-    _default_system_prompt: ClassVar[str] = MBPP_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -69,15 +67,14 @@ class MBPPDatasetParser(HuggingFaceDatasetParser[MBPPParseEntry]):
         test_setup_code = row.get("test_setup_code", "")
         challenge_test_list = row.get("challenge_test_list", [])
-        # Combine system prompt with the task description
-        prompt = f"{self._system_prompt}\n\nTask: {raw_question}"
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         source_file = row.get("source_file", "")
         return MBPPParseEntry.create(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         task_id: int,
             raise ValueError("Task ID must be an integer")
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=answer,  # In MBPP, the code solution is the raw answer
     _data_source: ClassVar[str] = "google-research-datasets/mbpp"
     _default_task: ClassVar[str] = "full"  # Can be 'full' or 'sanitized'
     _task_names: ClassVar[list[str]] = ["full", "sanitized"]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         test_setup_code = row.get("test_setup_code", "")
         challenge_test_list = row.get("challenge_test_list", [])
+        question = str(raw_question)
         # Use task_name if provided, otherwise use default
         task = task_name or self._get_current_task(row)
         source_file = row.get("source_file", "")
         return MBPPParseEntry.create(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             task_id=task_id,

llmdataparser/mgsm_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import MGSM_SYSTEM_PROMPT
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -21,7 +20,7 @@ class MGSMParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
@@ -31,7 +30,7 @@ class MGSMParseEntry(HuggingFaceParseEntry):
         language: str,
     ) -> "MGSMParseEntry":
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -60,7 +59,6 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
         "th",
         "zh",
     ]
-    _default_system_prompt: ClassVar[str] = MGSM_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -73,7 +71,7 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
             task_name: Language code for the current task
         Returns:
-            MGSMParseEntry: Processed entry with prompt, answer, and metadata
         """
         task = task_name or self._get_current_task(row)
         raw_question = row["question"]
@@ -81,14 +79,13 @@ class MGSMDatasetParser(HuggingFaceDatasetParser[MGSMParseEntry]):
         numerical_answer = row["answer_number"]
         equation_solution = row["equation_solution"]
-        # Construct the prompt with the system prompt and question
-        prompt = f"{self._system_prompt}\n{raw_question}"
         # Use numerical answer as string for the answer field if no detailed answer is provided
         answer = raw_answer if raw_answer else str(numerical_answer)
         return MGSMParseEntry.create(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -188,7 +185,7 @@ if __name__ == "__main__":
     parser.parse()
     parsed_data = parser.get_parsed_data
-    pprint(parsed_data[0].prompt)
     pprint(parsed_data[0].answer)
     pprint(parsed_data[0].raw_question)
     pprint(parsed_data[0].numerical_answer)

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 @dataclass(frozen=True, kw_only=True, slots=True)
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_answer: str,
         language: str,
     ) -> "MGSMParseEntry":
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         "th",
         "zh",
     ]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
             task_name: Language code for the current task
         Returns:
+            MGSMParseEntry: Processed entry with question, answer, and metadata
         """
         task = task_name or self._get_current_task(row)
         raw_question = row["question"]
         numerical_answer = row["answer_number"]
         equation_solution = row["equation_solution"]
+        question = str(raw_question)
         # Use numerical answer as string for the answer field if no detailed answer is provided
         answer = raw_answer if raw_answer else str(numerical_answer)
         return MGSMParseEntry.create(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
     parser.parse()
     parsed_data = parser.get_parsed_data
+    pprint(parsed_data[0].question)
     pprint(parsed_data[0].answer)
     pprint(parsed_data[0].raw_question)
     pprint(parsed_data[0].numerical_answer)

llmdataparser/mmlu_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import MMLU_PRO_SYSTEM_PROMPT, MMLU_SYSTEM_PROMPT
 MMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 MMLU_PRO_VALID_ANSWERS: Final[set[str]] = {
@@ -36,7 +35,7 @@ class MMLUParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
@@ -50,7 +49,7 @@ class MMLUParseEntry(HuggingFaceParseEntry):
         if not task_name:
             raise ValueError("Task name cannot be empty")
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -69,7 +68,7 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
@@ -83,7 +82,7 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
         if not task_name:
             raise ValueError("Task name cannot be empty")
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
@@ -95,8 +94,6 @@ class MMLUProParseEntry(HuggingFaceParseEntry):
 class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
     """Base class for MMLU dataset parsers with common functionality."""
-    _default_system_prompt = MMLU_SYSTEM_PROMPT
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
         task_name: str = data_entry.get("subject", "")
@@ -106,7 +103,7 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
     ) -> MMLUParseEntry:
         """
-        Generate a prompt and expected answer from the given row.
         Args:
             row: A data point to be formatted.
@@ -127,11 +124,11 @@ class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
         raw_choices = row["choices"]
         raw_answer = str(row["answer"])  # Ensure raw_answer is a string
-        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
         answer_letter = chr(65 + int(raw_answer))  # Convert index to 'A', 'B', 'C', 'D'
         return MMLUParseEntry.create(
-            prompt=prompt,
             answer=answer_letter,
             raw_question=raw_question,
             raw_choices=raw_choices,
@@ -482,11 +479,11 @@ class TMMLUPlusDatasetParser(MMLUDatasetParser):
         raw_question = row["question"]
         raw_answer = row["answer"]
-        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
         task = task_name or self._get_current_task(row)
         return MMLUParseEntry.create(
-            prompt, raw_answer, raw_question, raw_choices, raw_answer, task
         )
     def get_dataset_description(self) -> DatasetDescription:
@@ -572,7 +569,6 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
         "computer_science",
         "history",
     ]
-    _default_system_prompt = MMLU_PRO_SYSTEM_PROMPT
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
@@ -586,7 +582,7 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
     ) -> MMLUProParseEntry:
         """
-        Generate a prompt and expected answer from the given row.
         Args:
             row (dict[str, Any]): A data point to be formatted with MMLU Pro specific structure
@@ -608,13 +604,13 @@ class MMLUProDatasetParser(HuggingFaceDatasetParser[MMLUProParseEntry]):
         raw_answer = row["answer"]
         answer_index = row["answer_index"]
-        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
         answer_letter = chr(
             65 + answer_index
         )  # Convert index to 'A', 'B', 'C', 'D', etc.
         return MMLUProParseEntry.create(
-            prompt, answer_letter, raw_question, raw_choices, raw_answer, final_task
         )
     def get_dataset_description(self) -> DatasetDescription:

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 MMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 MMLU_PRO_VALID_ANSWERS: Final[set[str]] = {
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
         if not task_name:
             raise ValueError("Task name cannot be empty")
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
         if not task_name:
             raise ValueError("Task name cannot be empty")
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
 class MMLUDatasetParser(HuggingFaceDatasetParser[MMLUParseEntry]):
     """Base class for MMLU dataset parsers with common functionality."""
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
         task_name: str = data_entry.get("subject", "")
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
     ) -> MMLUParseEntry:
         """
+        Generate a question and expected answer from the given row.
         Args:
             row: A data point to be formatted.
         raw_choices = row["choices"]
         raw_answer = str(row["answer"])  # Ensure raw_answer is a string
+        question = f"Question: {raw_question}\n{choices}\nAnswer:"
         answer_letter = chr(65 + int(raw_answer))  # Convert index to 'A', 'B', 'C', 'D'
         return MMLUParseEntry.create(
+            question=question,
             answer=answer_letter,
             raw_question=raw_question,
             raw_choices=raw_choices,
         raw_question = row["question"]
         raw_answer = row["answer"]
+        question = f"Question: {raw_question}\n{choices}\nAnswer:"
         task = task_name or self._get_current_task(row)
         return MMLUParseEntry.create(
+            question, raw_answer, raw_question, raw_choices, raw_answer, task
         )
     def get_dataset_description(self) -> DatasetDescription:
         "computer_science",
         "history",
     ]
     def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
         """Get the task name from the data entry or default task name."""
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
     ) -> MMLUProParseEntry:
         """
+        Generate a question and expected answer from the given row.
         Args:
             row (dict[str, Any]): A data point to be formatted with MMLU Pro specific structure
         raw_answer = row["answer"]
         answer_index = row["answer_index"]
+        question = f"Question: {raw_question}\n{choices}\nAnswer:"
         answer_letter = chr(
             65 + answer_index
         )  # Convert index to 'A', 'B', 'C', 'D', etc.
         return MMLUProParseEntry.create(
+            question, answer_letter, raw_question, raw_choices, raw_answer, final_task
         )
     def get_dataset_description(self) -> DatasetDescription:

llmdataparser/prompts.py CHANGED Viewed

@@ -3,164 +3,65 @@ from typing import Final
 MMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are a highly knowledgeable expert tasked with answering multiple-choice questions across various academic and professional fields. Each question has four options (A, B, C, D). Your goal is to select the single most accurate answer based on factual knowledge.
-    Instructions:
-    1. Carefully analyze the question and all answer options
-    2. Consider only verified, factual information
-    3. Select the most precise and accurate option
-    4. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
 """
 )
 MMLU_PRO_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are a highly knowledgeable expert tasked with answering multiple-choice questions across various academic and professional fields. Each question has ten options (A through J). Your goal is to select the single most accurate answer based on factual knowledge.
-    Instructions:
-    1. Carefully analyze the question and all answer options
-    2. Consider only verified, factual information
-    3. Select the most precise and accurate option
-    4. Respond with ONLY the letter (A through J) - no explanations or additional text
 """
 )
 GSM8K_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are an expert mathematics tutor. Your task is to solve math word problems by breaking them down into clear, logical steps.
-    Instructions:
-    1. Read the problem carefully
-    2. Show your step-by-step reasoning
-    3. Ensure each step is clear and mathematically sound
-    4. End with the final numerical answer
-    5. Format your response as:
-       Let's solve this step by step:
-       1) [First step]
-       2) [Second step]
-       ...
-       Therefore, the answer is [number]
 """
 )
 HUMANEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are an expert Python programmer tasked with implementing Python functions. Your goal is to write clean, efficient, and correct code that meets the specifications.
-    Instructions:
-    1. Read the function signature and docstring carefully
-    2. Implement only the function body, not the signature or docstring
-    3. Follow Python best practices and PEP 8 style guidelines
-    4. Write clear, readable code with appropriate variable names
-    5. Handle edge cases and input validation where necessary
-    6. Use type hints and ensure type safety
-    7. Optimize for both readability and performance
-    8. Add comments for complex logic or non-obvious implementations
-    9. Include appropriate error handling with specific exception types
-    10. Consider writing code that would be easy to test
-    11. Return only the implementation code, no additional text
-    Example of good implementation:
-    ```python
-    # Handle edge case of empty input
-    if not numbers:
-        raise ValueError("Input list cannot be empty")
-    # Use descriptive variable names and type hints
-    result: list[int] = sorted(numbers)
-    return result[len(result) // 2]  # Return median value
-    ```
 """
 )
-MGSM_SYSTEM_PROMPT = textwrap.dedent(
     """\
-    You are an expert mathematics tutor who can explain solutions in multiple languages. Your task is to solve math word problems by breaking them down into clear, logical steps.
-    Instructions:
-    1. Read the problem carefully
-    2. Show your step-by-step reasoning
-    3. Ensure each step is clear and mathematically sound
-    4. Use appropriate number formatting for the target language (e.g., decimal points vs. commas)
-    5. End with the final numerical answer
-    6. Format your response as:
-       Let's solve this step by step:
-       1) [First step]
-       2) [Second step]
-       ...
-       Therefore, the answer is [number]
 """
 )
 IFEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are a precise instruction follower. Your task is to generate responses that exactly match given requirements and constraints.
-    Instructions:
-    1. Read all requirements carefully
-    2. Follow formatting rules exactly
-    3. Meet all length requirements
-    4. Include all required elements
-    5. Avoid forbidden elements
-    6. Provide ONLY the requested output
 """
 )
 BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are a highly intelligent expert tasked with solving complex reasoning problems. These problems test various cognitive abilities including logical deduction, causal reasoning, mathematical thinking, and spatial understanding.
-    Instructions:
-    1. Read the entire problem carefully, including all given conditions and rules
-    2. Pay attention to the specific type of reasoning required (logical, temporal, spatial, etc.)
-    3. Consider all relationships and constraints mentioned in the problem
-    4. Apply structured thinking to reach a valid conclusion
-    5. Choose the answer that logically follows from the given information
-    6. Respond with ONLY the letter (A, B, C, etc.) or "True"/"False" or "Yes"/"No" and so on - no explanations or additional text
 """
 )
 MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are an expert Python programmer tasked with solving basic programming problems. Your goal is to write clean, efficient, and well-tested Python code that solves the given task.
-    Instructions:
-    1. Read the task description carefully
-    2. Write a complete Python solution that solves the problem
-    3. Follow Python best practices and PEP 8 style guidelines
-    4. Write clear, readable code with descriptive variable names
-    5. Handle edge cases and input validation appropriately
-    6. Include docstrings or comments to explain complex logic
-    7. Focus on fundamental programming concepts and standard library usage
-    8. Optimize for readability and maintainability
-    9. Return only the implementation code, no additional text
 """
 )
 TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are an expert lawyer with deep knowledge of Taiwan's legal system. For each question, you will analyze legal scenarios or concepts based on Taiwan's laws and regulations. Your task is to select the most appropriate answer that aligns with Taiwan's legal principles.
-    Instructions:
-    1. Carefully analyze the legal question and all options
-    2. Consider Taiwan's specific legal context and terminology
-    3. Apply relevant laws, regulations, and legal principles
-    4. Select the single most accurate answer
-    5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
 """
 )
 TMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
-    You are an expert evaluator with deep knowledge of Taiwan's educational system and professional fields. For each question, analyze it carefully and select the most appropriate answer based on your understanding of the subject matter.
-    Instructions:
-    1. Carefully read and understand the question
-    2. Consider all answer options thoroughly
-    3. Apply subject-specific knowledge and reasoning
-    4. Select the single most accurate answer
-    5. Respond with ONLY the letter (A, B, C, or D) - no explanations or additional text
 """
 )

 MMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    You are an expert answering multiple-choice questions. Select the single most accurate answer (A, B, C, or D) based on factual knowledge. Respond with the letter only.
 """
 )
 MMLU_PRO_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    You are an expert answering multiple-choice questions. Select the single most accurate answer (A through J) based on factual knowledge. Respond with the letter only.
 """
 )
 GSM8K_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Solve this math problem step by step:
+    1) Show your reasoning
+    2) End with "Therefore, the answer is [number]"
 """
 )
 HUMANEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Implement the Python function following best practices. Include error handling, type hints, and comments for complex logic. Return only the implementation code.
 """
 )
+MGSM_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Solve this math problem step by step in the specified language:
+    1) Show your reasoning
+    2) Use appropriate number formatting
+    3) End with "Therefore, the answer is [number]"
 """
 )
 IFEVAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Follow the given requirements exactly. Provide only the requested output.
 """
 )
 BBH_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Solve this reasoning problem and respond with only the answer (letter, True/False, or Yes/No).
 """
 )
 MBPP_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Write clean, efficient Python code that solves the given task. Include docstrings and handle edge cases. Return only the implementation code.
 """
 )
 TW_LEGAL_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    As a Taiwan legal expert, select the most accurate answer (A, B, C, or D) based on Taiwan's laws. Respond with the letter only.
 """
 )
 TMLU_SYSTEM_PROMPT: Final[str] = textwrap.dedent(
     """\
+    Select the most accurate answer (A, B, C, or D) based on Taiwan's educational and professional knowledge. Respond with the letter only.
 """
 )

llmdataparser/tmlu_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import TMLU_SYSTEM_PROMPT
 TMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 TMLU_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TMLU_VALID_ANSWERS))
@@ -24,7 +23,7 @@ class TMLUParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
@@ -38,7 +37,7 @@ class TMLUParseEntry(HuggingFaceParseEntry):
                 f"Invalid answer_letter '{answer}'; must be one of {TMLU_VALID_ANSWER_STR}"
             )
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -93,7 +92,6 @@ class TMLUDatasetParser(HuggingFaceDatasetParser[TMLUParseEntry]):
         "teacher_qualification",
         "accountant",
     ]
-    _default_system_prompt = TMLU_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -110,10 +108,10 @@ class TMLUDatasetParser(HuggingFaceDatasetParser[TMLUParseEntry]):
         explanation = row.get("explanation", "")
         metadata = row.get("metadata", {})
-        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
         return TMLUParseEntry.create(
-            prompt=prompt,
             answer=raw_answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
@@ -187,7 +185,7 @@ if __name__ == "__main__":
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
-        print(f"Question: {example.raw_question}")
         print("Choices:")
         for i, choice in enumerate(example.raw_choices):
             print(f"{chr(65 + i)}. {choice}")

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 TMLU_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 TMLU_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TMLU_VALID_ANSWERS))
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
                 f"Invalid answer_letter '{answer}'; must be one of {TMLU_VALID_ANSWER_STR}"
             )
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
         "teacher_qualification",
         "accountant",
     ]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         explanation = row.get("explanation", "")
         metadata = row.get("metadata", {})
+        question = f"Question: {raw_question}\n{choices}\nAnswer:"
         return TMLUParseEntry.create(
+            question=question,
             answer=raw_answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
         example = parsed_data[0]
         print("\nExample parsed entry:")
         print(f"Task: {example.task_name}")
+        print(f"Question: {example.question}")
         print("Choices:")
         for i, choice in enumerate(example.raw_choices):
             print(f"{chr(65 + i)}. {choice}")

llmdataparser/tw_legal_parser.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llmdataparser.base_parser import (
     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
-from llmdataparser.prompts import TW_LEGAL_SYSTEM_PROMPT
 TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
@@ -22,7 +21,7 @@ class TWLegalParseEntry(HuggingFaceParseEntry):
     @classmethod
     def create(
         cls,
-        prompt: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
@@ -34,7 +33,7 @@ class TWLegalParseEntry(HuggingFaceParseEntry):
                 f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
             )
         return cls(
-            prompt=prompt,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
@@ -49,7 +48,6 @@ class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
     _data_source = "lianghsun/tw-legal-benchmark-v1"
     _default_task = "default"
     _task_names = ["default"]
-    _default_system_prompt = TW_LEGAL_SYSTEM_PROMPT
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
@@ -64,10 +62,10 @@ class TWLegalDatasetParser(HuggingFaceDatasetParser[TWLegalParseEntry]):
         raw_question = row["question"]
         raw_answer = row["answer"]
-        prompt = f"{self._system_prompt}\nQuestion: {raw_question}\n{choices}\nAnswer:"
         return TWLegalParseEntry.create(
-            prompt=prompt,
             answer=raw_answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
@@ -119,7 +117,7 @@ if __name__ == "__main__":
     if parsed_data:
         example = parsed_data[0]
         print("\nExample parsed entry:")
-        print(f"Question: {example.raw_question}")
         print("Choices:")
         for i, choice in enumerate(example.raw_choices):
             print(f"{chr(65 + i)}. {choice}")

     HuggingFaceDatasetParser,
     HuggingFaceParseEntry,
 )
 TW_LEGAL_VALID_ANSWERS: Final[set[str]] = {"A", "B", "C", "D"}
 TW_LEGAL_VALID_ANSWER_STR: Final[str] = ", ".join(sorted(TW_LEGAL_VALID_ANSWERS))
     @classmethod
     def create(
         cls,
+        question: str,
         answer: str,
         raw_question: str,
         raw_choices: list[str],
                 f"Invalid answer_letter '{answer}'; must be one of {TW_LEGAL_VALID_ANSWER_STR}"
             )
         return cls(
+            question=question,
             answer=answer,
             raw_question=raw_question,
             raw_answer=raw_answer,
     _data_source = "lianghsun/tw-legal-benchmark-v1"
     _default_task = "default"
     _task_names = ["default"]
     def process_entry(
         self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
         raw_question = row["question"]
         raw_answer = row["answer"]
+        question = f"Question: {raw_question}\n{choices}\nAnswer:"
         return TWLegalParseEntry.create(
+            question=question,
             answer=raw_answer,
             raw_question=raw_question,
             raw_choices=raw_choices,
     if parsed_data:
         example = parsed_data[0]
         print("\nExample parsed entry:")
+        print(f"Question: {example.question}")
         print("Choices:")
         for i, choice in enumerate(example.raw_choices):
             print(f"{chr(65 + i)}. {choice}")

tests/test_bbh_parser.py CHANGED Viewed

@@ -28,14 +28,14 @@ def sample_row():
 def test_bbh_parse_entry_creation_valid():
     """Test valid creation of BBHParseEntry."""
     entry = BBHParseEntry.create(
-        prompt="Test prompt",
         answer="A",
         raw_question="Test question",
         raw_answer="(A)",
         task_name="reasoning_about_colored_objects",
     )
     assert isinstance(entry, BBHParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "A"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "(A)"
@@ -76,7 +76,6 @@ def test_full_parse_workflow(loaded_bbh_parser):
     assert isinstance(first_entry, BBHParseEntry)
     assert first_entry.task_name == "reasoning_about_colored_objects"
     assert first_entry.answer.strip("()").isalpha()  # Should be a single letter
-    assert first_entry.prompt.startswith(loaded_bbh_parser._system_prompt)
 def test_process_entry(bbh_parser, sample_row):
@@ -87,9 +86,8 @@ def test_process_entry(bbh_parser, sample_row):
     assert isinstance(entry, BBHParseEntry)
     assert entry.answer == "A"  # Stripped from "(A)"
-    assert "What color is the sky" in entry.raw_question
     assert entry.raw_answer == "(A)"
-    assert bbh_parser._system_prompt in entry.prompt
     assert entry.task_name == "reasoning_about_colored_objects"

 def test_bbh_parse_entry_creation_valid():
     """Test valid creation of BBHParseEntry."""
     entry = BBHParseEntry.create(
+        question="Test question",
         answer="A",
         raw_question="Test question",
         raw_answer="(A)",
         task_name="reasoning_about_colored_objects",
     )
     assert isinstance(entry, BBHParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "A"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "(A)"
     assert isinstance(first_entry, BBHParseEntry)
     assert first_entry.task_name == "reasoning_about_colored_objects"
     assert first_entry.answer.strip("()").isalpha()  # Should be a single letter
 def test_process_entry(bbh_parser, sample_row):
     assert isinstance(entry, BBHParseEntry)
     assert entry.answer == "A"  # Stripped from "(A)"
+    assert "What color is the sky" in entry.question
     assert entry.raw_answer == "(A)"
     assert entry.task_name == "reasoning_about_colored_objects"

tests/test_gsm8k_parser.py CHANGED Viewed

@@ -30,7 +30,7 @@ def sample_row():
 def test_gsm8k_parse_entry_creation_valid():
     """Test valid creation of GSM8KParseEntry."""
     entry = GSM8KParseEntry.create(
-        prompt="Test prompt",
         answer="5",
         raw_question="Test question",
         raw_answer="Solution steps #### 5",
@@ -39,7 +39,7 @@ def test_gsm8k_parse_entry_creation_valid():
         numerical_answer=5,
     )
     assert isinstance(entry, GSM8KParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "5"
     assert entry.solution == "Solution steps"
     assert entry.numerical_answer == 5
@@ -83,7 +83,6 @@ def test_full_parse_workflow(loaded_gsm8k_parser):
     assert isinstance(first_entry.numerical_answer, (str, int, float))
     assert "####" in first_entry.raw_answer
     assert first_entry.solution
-    assert first_entry.prompt.startswith(loaded_gsm8k_parser._system_prompt)
 def test_process_entry(gsm8k_parser, sample_row):
@@ -95,7 +94,6 @@ def test_process_entry(gsm8k_parser, sample_row):
     assert "Janet has 3 apples" in entry.raw_question
     assert "#### 5" in entry.raw_answer
     assert "Let's solve this step by step:" in entry.solution
-    assert gsm8k_parser._system_prompt in entry.prompt
     assert entry.task_name == "main"

 def test_gsm8k_parse_entry_creation_valid():
     """Test valid creation of GSM8KParseEntry."""
     entry = GSM8KParseEntry.create(
+        question="Test question",
         answer="5",
         raw_question="Test question",
         raw_answer="Solution steps #### 5",
         numerical_answer=5,
     )
     assert isinstance(entry, GSM8KParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "5"
     assert entry.solution == "Solution steps"
     assert entry.numerical_answer == 5
     assert isinstance(first_entry.numerical_answer, (str, int, float))
     assert "####" in first_entry.raw_answer
     assert first_entry.solution
 def test_process_entry(gsm8k_parser, sample_row):
     assert "Janet has 3 apples" in entry.raw_question
     assert "#### 5" in entry.raw_answer
     assert "Let's solve this step by step:" in entry.solution
     assert entry.task_name == "main"

tests/test_humaneval_parser.py CHANGED Viewed

@@ -42,7 +42,7 @@ def plus_sample_entry():
 def test_humaneval_parse_entry_creation():
     """Test creation of HumanEvalParseEntry"""
     entry = HumanEvalParseEntry.create(
-        prompt="test prompt",
         answer="test answer",
         raw_question="raw question",
         task_id="HumanEval/1",
@@ -51,7 +51,7 @@ def test_humaneval_parse_entry_creation():
         task_name="openai_humaneval",
     )
-    assert entry.prompt == "test prompt"
     assert entry.answer == "test answer"
     assert entry.raw_question == "raw question"
     assert entry.raw_answer == "test answer"  # Should match answer
@@ -65,7 +65,7 @@ def test_humaneval_parse_entry_validation():
     """Test validation of required fields"""
     with pytest.raises(ValueError, match="Task ID cannot be empty"):
         HumanEvalParseEntry.create(
-            prompt="test",
             answer="test",
             raw_question="test",
             task_id="",  # Empty task_id should raise error
@@ -76,7 +76,7 @@ def test_humaneval_parse_entry_validation():
     with pytest.raises(ValueError, match="Entry point cannot be empty"):
         HumanEvalParseEntry.create(
-            prompt="test",
             answer="test",
             raw_question="test",
             task_id="test",
@@ -93,9 +93,7 @@ def test_process_entry(parser, sample_entry):
     assert isinstance(result, HumanEvalParseEntry)
     assert result.task_id == "HumanEval/0"
     assert result.entry_point == "add"
-    assert (
-        result.prompt == f"{parser._default_system_prompt}\n\n{sample_entry['prompt']}"
-    )
     assert result.answer == sample_entry["canonical_solution"]
     assert result.test == sample_entry["test"]
     assert result.task_name == "openai_humaneval"
@@ -147,10 +145,7 @@ def test_plus_process_entry(plus_parser, plus_sample_entry):
     assert isinstance(result, HumanEvalParseEntry)
     assert result.task_id == "HumanEval/0"
     assert result.entry_point == "add"
-    assert (
-        result.prompt
-        == f"{plus_parser._default_system_prompt}\n\n{plus_sample_entry['prompt']}"
-    )
     assert result.answer == plus_sample_entry["canonical_solution"]
     assert result.test == plus_sample_entry["test"]
     assert result.task_name == "default"
@@ -191,7 +186,7 @@ def test_get_dataset_description(parser, plus_parser):
     assert "evalplus" in plus_description.citation
-def test_get_evaluation_metrics(parser, plus_parser):
     """Test evaluation metrics generation for both parsers."""
     # Test original HumanEval metrics
     metrics = parser.get_evaluation_metrics()

 def test_humaneval_parse_entry_creation():
     """Test creation of HumanEvalParseEntry"""
     entry = HumanEvalParseEntry.create(
+        question="test question",
         answer="test answer",
         raw_question="raw question",
         task_id="HumanEval/1",
         task_name="openai_humaneval",
     )
+    assert entry.question == "test question"
     assert entry.answer == "test answer"
     assert entry.raw_question == "raw question"
     assert entry.raw_answer == "test answer"  # Should match answer
     """Test validation of required fields"""
     with pytest.raises(ValueError, match="Task ID cannot be empty"):
         HumanEvalParseEntry.create(
+            question="test",
             answer="test",
             raw_question="test",
             task_id="",  # Empty task_id should raise error
     with pytest.raises(ValueError, match="Entry point cannot be empty"):
         HumanEvalParseEntry.create(
+            question="test",
             answer="test",
             raw_question="test",
             task_id="test",
     assert isinstance(result, HumanEvalParseEntry)
     assert result.task_id == "HumanEval/0"
     assert result.entry_point == "add"
     assert result.answer == sample_entry["canonical_solution"]
     assert result.test == sample_entry["test"]
     assert result.task_name == "openai_humaneval"
     assert isinstance(result, HumanEvalParseEntry)
     assert result.task_id == "HumanEval/0"
     assert result.entry_point == "add"
     assert result.answer == plus_sample_entry["canonical_solution"]
     assert result.test == plus_sample_entry["test"]
     assert result.task_name == "default"
     assert "evalplus" in plus_description.citation
+def test_get_evaluation_metrics(parser):
     """Test evaluation metrics generation for both parsers."""
     # Test original HumanEval metrics
     metrics = parser.get_evaluation_metrics()

tests/test_ifeval_parser.py CHANGED Viewed

@@ -31,7 +31,7 @@ def ifeval_parser():
 def test_ifeval_parse_entry_creation_valid():
     """Test valid creation of IFEvalParseEntry."""
     entry = IFEvalParseEntry.create(
-        prompt="Test system prompt\n\nTest instruction",
         answer="",  # IFEval doesn't have answers
         raw_question="Test instruction",
         raw_answer="",
@@ -42,7 +42,7 @@ def test_ifeval_parse_entry_creation_valid():
     )
     assert isinstance(entry, IFEvalParseEntry)
-    assert entry.prompt == "Test system prompt\n\nTest instruction"
     assert entry.answer == ""
     assert entry.key == 1
     assert entry.instruction_id_list == ["test_001", "test_002"]

 def test_ifeval_parse_entry_creation_valid():
     """Test valid creation of IFEvalParseEntry."""
     entry = IFEvalParseEntry.create(
+        question="Test instruction",
         answer="",  # IFEval doesn't have answers
         raw_question="Test instruction",
         raw_answer="",
     )
     assert isinstance(entry, IFEvalParseEntry)
+    assert entry.question == "Test instruction"
     assert entry.answer == ""
     assert entry.key == 1
     assert entry.instruction_id_list == ["test_001", "test_002"]

tests/test_math_parser.py CHANGED Viewed

@@ -44,7 +44,7 @@ def sample_math_entries():
 def test_math_parse_entry_creation_valid():
     """Test valid creation of MATHParseEntry with all fields."""
     entry = MATHParseEntry.create(
-        prompt="Test prompt",
         answer="Test answer",
         raw_question="Test question",
         raw_answer="Test solution",
@@ -54,7 +54,7 @@ def test_math_parse_entry_creation_valid():
     )
     assert isinstance(entry, MATHParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "Test answer"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "Test solution"
@@ -85,9 +85,7 @@ def test_process_entry(math_parser, test_case):
     entry = math_parser.process_entry(test_case, task_name=test_case["type"])
     assert isinstance(entry, MATHParseEntry)
-    assert (
-        entry.prompt == f"{math_parser._default_system_prompt}\n{test_case['problem']}"
-    )
     assert entry.answer == test_case["solution"]
     assert entry.raw_question == test_case["problem"]
     assert entry.raw_answer == test_case["solution"]
@@ -108,7 +106,6 @@ def test_math_parser_initialization(math_parser):
         math_parser.get_huggingface_link
         == "https://huggingface.co/datasets/lighteval/MATH"
     )
-    assert "mathematics problem" in math_parser._default_system_prompt.lower()
 def test_get_current_task(math_parser):

 def test_math_parse_entry_creation_valid():
     """Test valid creation of MATHParseEntry with all fields."""
     entry = MATHParseEntry.create(
+        question="Test question",
         answer="Test answer",
         raw_question="Test question",
         raw_answer="Test solution",
     )
     assert isinstance(entry, MATHParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "Test answer"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "Test solution"
     entry = math_parser.process_entry(test_case, task_name=test_case["type"])
     assert isinstance(entry, MATHParseEntry)
     assert entry.answer == test_case["solution"]
     assert entry.raw_question == test_case["problem"]
     assert entry.raw_answer == test_case["solution"]
         math_parser.get_huggingface_link
         == "https://huggingface.co/datasets/lighteval/MATH"
     )
 def test_get_current_task(math_parser):

tests/test_mbpp_parser.py CHANGED Viewed

@@ -23,7 +23,7 @@ def parser():
 def test_mbpp_parse_entry_creation():
     """Test creation of MBPPParseEntry"""
     entry = MBPPParseEntry.create(
-        prompt="test prompt",
         answer="test answer",
         raw_question="raw question",
         task_id=42,
@@ -34,7 +34,7 @@ def test_mbpp_parse_entry_creation():
         source_file="test.pdf",
     )
-    assert entry.prompt == "test prompt"
     assert entry.answer == "test answer"
     assert entry.raw_question == "raw question"
     assert entry.raw_answer == "test answer"
@@ -49,7 +49,7 @@ def test_mbpp_parse_entry_validation():
     """Test validation of required fields"""
     with pytest.raises(ValueError, match="Task ID must be an integer"):
         MBPPParseEntry.create(
-            prompt="test",
             answer="test",
             raw_question="test",
             task_id="not_an_int",  # Invalid task_id type
@@ -71,8 +71,6 @@ def test_process_entry(parser, sample_entry):
     assert result.answer == sample_entry["code"]
     assert result.test_list == sample_entry["test_list"]
     assert result.challenge_test_list == sample_entry["challenge_test_list"]
-    expected_prompt = f"{parser._system_prompt}\n\nTask: {sample_entry['text']}"
-    assert result.prompt == expected_prompt
     assert result.task_name == "full"
@@ -142,18 +140,6 @@ def test_full_workflow_with_different_splits(parser):
     assert all(entry.task_name == "full" for entry in train_data)
-def test_custom_system_prompt():
-    """Test parser initialization with custom system prompt"""
-    custom_prompt = "Custom system prompt"
-    parser = MBPPDatasetParser(system_prompt=custom_prompt)
-    assert parser._system_prompt == custom_prompt
-def test_default_system_prompt(parser):
-    """Test parser uses default system prompt when none provided"""
-    assert parser._system_prompt == parser._default_system_prompt
 def test_get_dataset_description(parser):
     """Test dataset description generation."""
     description = parser.get_dataset_description()

 def test_mbpp_parse_entry_creation():
     """Test creation of MBPPParseEntry"""
     entry = MBPPParseEntry.create(
+        question="test question",
         answer="test answer",
         raw_question="raw question",
         task_id=42,
         source_file="test.pdf",
     )
+    assert entry.question == "test question"
     assert entry.answer == "test answer"
     assert entry.raw_question == "raw question"
     assert entry.raw_answer == "test answer"
     """Test validation of required fields"""
     with pytest.raises(ValueError, match="Task ID must be an integer"):
         MBPPParseEntry.create(
+            question="test",
             answer="test",
             raw_question="test",
             task_id="not_an_int",  # Invalid task_id type
     assert result.answer == sample_entry["code"]
     assert result.test_list == sample_entry["test_list"]
     assert result.challenge_test_list == sample_entry["challenge_test_list"]
     assert result.task_name == "full"
     assert all(entry.task_name == "full" for entry in train_data)
 def test_get_dataset_description(parser):
     """Test dataset description generation."""
     description = parser.get_dataset_description()

tests/test_mgsm_parser.py CHANGED Viewed

@@ -47,7 +47,7 @@ def sample_mgsm_entries():
 def test_mgsm_parse_entry_creation_valid():
     """Test valid creation of MGSMParseEntry with all fields."""
     entry = MGSMParseEntry.create(
-        prompt="Test prompt",
         answer="Test answer",
         raw_question="Test question",
         raw_answer="Test answer",
@@ -58,7 +58,7 @@ def test_mgsm_parse_entry_creation_valid():
     )
     assert isinstance(entry, MGSMParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "Test answer"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "Test answer"
@@ -168,22 +168,6 @@ def test_supported_languages(mgsm_parser, language):
     assert entry.numerical_answer == 42
-def test_system_prompt_override(mgsm_parser):
-    """Test overriding the default system prompt."""
-    custom_prompt = "Custom system prompt for testing"
-    parser = MGSMDatasetParser(system_prompt=custom_prompt)
-    test_entry = {
-        "question": "Test question",
-        "answer": "Test answer",
-        "answer_number": 42,
-        "equation_solution": "42",
-    }
-    entry = parser.process_entry(test_entry, task_name="en")
-    assert custom_prompt in entry.prompt
 def test_get_dataset_description(mgsm_parser):
     """Test dataset description generation."""
     description = mgsm_parser.get_dataset_description()

 def test_mgsm_parse_entry_creation_valid():
     """Test valid creation of MGSMParseEntry with all fields."""
     entry = MGSMParseEntry.create(
+        question="Test question",
         answer="Test answer",
         raw_question="Test question",
         raw_answer="Test answer",
     )
     assert isinstance(entry, MGSMParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "Test answer"
     assert entry.raw_question == "Test question"
     assert entry.raw_answer == "Test answer"
     assert entry.numerical_answer == 42
 def test_get_dataset_description(mgsm_parser):
     """Test dataset description generation."""
     description = mgsm_parser.get_dataset_description()

tests/test_mmlu_parser.py CHANGED Viewed

@@ -70,7 +70,7 @@ def sample_mmlu_pro_entries():
 def test_mmlu_parse_entry_creation_valid():
     """Test valid creation of MMLUParseEntry."""
     entry = MMLUParseEntry.create(
-        prompt="Test prompt",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -78,7 +78,7 @@ def test_mmlu_parse_entry_creation_valid():
         task_name="test_task",
     )
     assert isinstance(entry, MMLUParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
     assert entry.task_name == "test_task"
@@ -91,7 +91,7 @@ def test_mmlu_parse_entry_creation_invalid(invalid_answer):
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         MMLUParseEntry.create(
-            prompt="Test prompt",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -106,10 +106,10 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
     assert isinstance(entry, MMLUParseEntry)
     assert entry.answer == "B"  # Index 1 maps to B
-    assert "A. London" in entry.prompt
-    assert "B. Paris" in entry.prompt
-    assert "C. Berlin" in entry.prompt
-    assert "D. Madrid" in entry.prompt
     assert entry.raw_question == "What is the capital of France?"
     assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
     assert entry.raw_answer == "1"
@@ -119,7 +119,7 @@ def test_process_entry_base(base_parser, sample_mmlu_entries):
 def test_mmlu_pro_parse_entry_creation_valid():
     """Test valid creation of MMLUProParseEntry."""
     entry = MMLUProParseEntry.create(
-        prompt="Test prompt",
         answer="E",  # MMLU Pro supports up to J
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4", "choice5"],
@@ -139,7 +139,7 @@ def test_process_entry_mmlu_pro(mmlu_pro_parser, sample_mmlu_pro_entries):
     assert isinstance(entry, MMLUProParseEntry)
     assert entry.answer == "B"  # Index 1 maps to B
-    assert "O(n log n)" in entry.prompt
     assert entry.task_name == "computer_science"
     assert len(entry.raw_choices) == 6

 def test_mmlu_parse_entry_creation_valid():
     """Test valid creation of MMLUParseEntry."""
     entry = MMLUParseEntry.create(
+        question="Test question",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
         task_name="test_task",
     )
     assert isinstance(entry, MMLUParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
     assert entry.task_name == "test_task"
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         MMLUParseEntry.create(
+            question="Test question",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
     assert isinstance(entry, MMLUParseEntry)
     assert entry.answer == "B"  # Index 1 maps to B
+    assert "A. London" in entry.question
+    assert "B. Paris" in entry.question
+    assert "C. Berlin" in entry.question
+    assert "D. Madrid" in entry.question
     assert entry.raw_question == "What is the capital of France?"
     assert entry.raw_choices == ["London", "Paris", "Berlin", "Madrid"]
     assert entry.raw_answer == "1"
 def test_mmlu_pro_parse_entry_creation_valid():
     """Test valid creation of MMLUProParseEntry."""
     entry = MMLUProParseEntry.create(
+        question="Test question",
         answer="E",  # MMLU Pro supports up to J
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4", "choice5"],
     assert isinstance(entry, MMLUProParseEntry)
     assert entry.answer == "B"  # Index 1 maps to B
+    assert "O(n log n)" in entry.question
     assert entry.task_name == "computer_science"
     assert len(entry.raw_choices) == 6

tests/test_tmlu_parser.py CHANGED Viewed

@@ -47,7 +47,7 @@ def sample_tmlu_entries():
 def test_tmlu_parse_entry_creation_valid():
     """Test valid creation of TMLUParseEntry."""
     entry = TMLUParseEntry.create(
-        prompt="Test prompt",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -57,7 +57,7 @@ def test_tmlu_parse_entry_creation_valid():
         metadata={"source": "test"},
     )
     assert isinstance(entry, TMLUParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
     assert entry.explanation == "Test explanation"
@@ -71,7 +71,7 @@ def test_tmlu_parse_entry_creation_invalid(invalid_answer):
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         TMLUParseEntry.create(
-            prompt="Test prompt",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -140,26 +140,6 @@ def test_different_tasks_parsing(tmlu_parser):
     assert math_count > 0
-def test_system_prompt_override(tmlu_parser):
-    """Test overriding the default system prompt."""
-    custom_prompt = "Custom system prompt for testing"
-    parser = TMLUDatasetParser(system_prompt=custom_prompt)
-    test_entry = {
-        "question": "Test question",
-        "A": "Choice A",
-        "B": "Choice B",
-        "C": "Choice C",
-        "D": "Choice D",
-        "answer": "A",
-        "explanation": "Test explanation",
-        "metadata": {"source": "test"},
-    }
-    entry = parser.process_entry(test_entry)
-    assert custom_prompt in entry.prompt
 def test_metadata_handling(tmlu_parser, sample_tmlu_entries):
     """Test proper handling of metadata in entries."""
     entry = tmlu_parser.process_entry(sample_tmlu_entries[0])

 def test_tmlu_parse_entry_creation_valid():
     """Test valid creation of TMLUParseEntry."""
     entry = TMLUParseEntry.create(
+        question="Test question",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
         metadata={"source": "test"},
     )
     assert isinstance(entry, TMLUParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
     assert entry.explanation == "Test explanation"
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         TMLUParseEntry.create(
+            question="Test question",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
     assert math_count > 0
 def test_metadata_handling(tmlu_parser, sample_tmlu_entries):
     """Test proper handling of metadata in entries."""
     entry = tmlu_parser.process_entry(sample_tmlu_entries[0])

tests/test_tw_legal_parser.py CHANGED Viewed

@@ -35,7 +35,7 @@ def sample_tw_legal_entries():
 def test_tw_legal_parse_entry_creation_valid():
     """Test valid creation of TWLegalParseEntry."""
     entry = TWLegalParseEntry.create(
-        prompt="Test prompt",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -43,7 +43,7 @@ def test_tw_legal_parse_entry_creation_valid():
         task_name="default",
     )
     assert isinstance(entry, TWLegalParseEntry)
-    assert entry.prompt == "Test prompt"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
@@ -55,7 +55,7 @@ def test_tw_legal_parse_entry_creation_invalid(invalid_answer):
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         TWLegalParseEntry.create(
-            prompt="Test prompt",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
@@ -70,10 +70,10 @@ def test_process_entry(tw_legal_parser, sample_tw_legal_entries):
     assert isinstance(entry, TWLegalParseEntry)
     assert entry.answer == "D"
-    assert "A. 法人於法令限制內，有享受權利負擔義務之能力" in entry.prompt
-    assert "B. 法人因目的之達到而消滅" in entry.prompt
-    assert "C. 法人非依法律之規定，不得成立" in entry.prompt
-    assert "D. 法人於登記前，即取得權利能力" in entry.prompt
     assert entry.raw_question == "依民法規定，下列關於法人之敘述，何者錯誤？"
     assert len(entry.raw_choices) == 4
@@ -122,24 +122,6 @@ def test_data_parsing(tw_legal_parser):
     assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
-def test_system_prompt_override(tw_legal_parser):
-    """Test overriding the default system prompt."""
-    custom_prompt = "Custom system prompt for testing"
-    parser = TWLegalDatasetParser(system_prompt=custom_prompt)
-    test_entry = {
-        "question": "Test question",
-        "A": "Choice A",
-        "B": "Choice B",
-        "C": "Choice C",
-        "D": "Choice D",
-        "answer": "A",
-    }
-    entry = parser.process_entry(test_entry)
-    assert custom_prompt in entry.prompt
 def test_get_dataset_description(tw_legal_parser):
     """Test getting dataset description for Taiwan Legal parser."""
     description = tw_legal_parser.get_dataset_description()

 def test_tw_legal_parse_entry_creation_valid():
     """Test valid creation of TWLegalParseEntry."""
     entry = TWLegalParseEntry.create(
+        question="Test question",
         answer="A",
         raw_question="Test question",
         raw_choices=["choice1", "choice2", "choice3", "choice4"],
         task_name="default",
     )
     assert isinstance(entry, TWLegalParseEntry)
+    assert entry.question == "Test question"
     assert entry.answer == "A"
     assert entry.raw_choices == ["choice1", "choice2", "choice3", "choice4"]
         ValueError, match="Invalid answer_letter.*must be one of A, B, C, D"
     ):
         TWLegalParseEntry.create(
+            question="Test question",
             answer=invalid_answer,
             raw_question="Test question",
             raw_choices=["choice1", "choice2", "choice3", "choice4"],
     assert isinstance(entry, TWLegalParseEntry)
     assert entry.answer == "D"
+    assert "A. 法人於法令限制內，有享受權利負擔義務之能力" in entry.question
+    assert "B. 法人因目的之達到而消滅" in entry.question
+    assert "C. 法人非依法律之規定，不得成立" in entry.question
+    assert "D. 法人於登記前，即取得權利能力" in entry.question
     assert entry.raw_question == "依民法規定，下列關於法人之敘述，何者錯誤？"
     assert len(entry.raw_choices) == 4
     assert all(entry.answer in {"A", "B", "C", "D"} for entry in parsed_data)
 def test_get_dataset_description(tw_legal_parser):
     """Test getting dataset description for Taiwan Legal parser."""
     description = tw_legal_parser.get_dataset_description()