File size: 2,113 Bytes
5832f57
20b3b4a
5832f57
20b3b4a
5832f57
 
 
 
 
 
 
 
 
 
9ff00d4
20b3b4a
5832f57
20b3b4a
 
 
 
 
1e91476
 
20b3b4a
1e91476
 
 
 
 
 
20b3b4a
1e91476
 
 
20b3b4a
1e91476
 
 
 
 
 
 
20b3b4a
1e91476
 
 
 
 
 
 
 
20b3b4a
1e91476
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from typing import List
import re
from langchain.schema import BaseOutputParser
from itertools import chain

class CustomStringOutputParser(BaseOutputParser[List[str]]):
    """Parse the output of an LLM call to a list."""

    @property
    def _type(self) -> str:
        return "str"

    def parse(self, text: str) -> str:
        """Parse the output of an LLM call."""
        text = text.split("helper")[0]
        text = text.split("\nhelper")[0]
        text = text.rstrip("\n")
        text_list = text.split("texter:")
        text_list = [x.split("\ntexter") for x in text_list]
        text_list = [x.strip() for x in list(chain.from_iterable(text_list))]
        return text_list
    
# class CustomINSTOutputParser(BaseOutputParser[List[str]]):
#     """Parse the output of an LLM call to a list."""

#     name = "Kit"
#     name_rx = re.compile(r""+ name + r":|" + name.lower() + r":")
#     whispers = re.compile((r"([\(]).*?([\)])"))
#     reactions = re.compile(r"([\*]).*?([\*])")
#     double_spaces = re.compile(r"  ")
#     quotation_rx = re.compile('"')

#     @property
#     def _type(self) -> str:
#         return "str"

    # def parse_whispers(self, text: str) -> str:
    #     text = self.name_rx.sub("", text).strip()
    #     text = self.reactions.sub("", text).strip()
    #     text = self.whispers.sub("", text).strip()
    #     text = self.double_spaces.sub(r" ", text).strip()
    #     text = self.quotation_rx.sub("", text).strip()
    #     return text 
    
    # def parse_split(self, text: str) -> str:
    #     text = text.split("[INST]")[0]
    #     text_list = text.split("[/INST]")
    #     text_list = [x.split("</s>") for x in text_list]
    #     text_list = [x.strip() for x in list(chain.from_iterable(text_list))]
    #     text_list = [x.split("\n\n") for x in text_list]
    #     text_list = [x.strip().rstrip("\n") for x in list(chain.from_iterable(text_list))]
    #     return text_list
    
    # def parse(self, text: str) -> str:
    #     text = self.parse_whispers(text)
    #     text_list = self.parse_split(text)
    #     return text_list