File size: 2,413 Bytes
3456a58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Load model directly
from langchain.llms import HuggingFacePipeline
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    pipeline,
    GenerationConfig
)

model_id = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, device_map="cuda:0")
gen_config = GenerationConfig.from_pretrained(model_id)

class lamini:
    def __init__(self) -> None:
        pass
    
    def load_model(
        task="text2text-generation",
        **kwargs
    ):
        """Returns a pipeline for the model
        - model: MBZUAI/LaMini-Flan-T5-248M
        
        Returns:
            _type_: _description_
        """
        
        max_length = kwargs.get("max_length", 512)
        temperature = kwargs.get("temperature", 0)
        top_p = kwargs.get("top_p", 0.95)
        repetition_penalty = kwargs.get("repetition_penalty", 1.15)
        
        pipe = pipeline( 
                "text2text-generation",
                model=model,
                tokenizer=tokenizer,
                generation_config=gen_config,
                max_length=max_length,
                top_p=top_p,
                temperature=temperature,
                repetition_penalty=repetition_penalty,
        )
        
        llm = HuggingFacePipeline(pipeline=pipe)
        return llm
    
    class templates:
        
        def summarize(self, text):
            instructions = "summarize for better understanding: "
            pipe = pipeline(
                "text2text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=400,
                generation_config=gen_config,
                temperature=0,
                top_p=0.95,
                repetition_penalty=1.15
            )
            return pipe(instructions + text)
        
        def generate_title(self, text):
            instructions = "generate a perfect title for the following text in 6 words: "
        
            pipe = pipeline(
                "text2text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=60,
                generation_config=gen_config,
                temperature=0,
                top_p=0.95,
                repetition_penalty=1.15
            )
        
            return pipe(instructions + text)