Iker commited on
Commit
5e7e2b6
1 Parent(s): c543076

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -10
README.md CHANGED
@@ -68,8 +68,10 @@ A model finetuned with the [NoticIA Dataset](https://huggingface.co/datasets/Ike
68
 
69
  # Usage example:
70
  ```python
71
- from datasets import load_dataset
72
- from transformers import pipeline
 
 
73
 
74
  def prompt(
75
  headline: str,
@@ -103,21 +105,37 @@ def prompt(
103
  f"{body}\n"
104
  )
105
 
 
 
106
 
 
107
 
 
 
 
 
108
 
 
 
 
 
 
109
 
110
- dataset = load_dataset("Iker/NoticIA")
111
-
112
- example = dataset["test"][0]
113
-
114
- model_input = prompt(headline=example["web_headline"],body=example["web_text"])
115
 
116
- pipe = pipeline("text-generation", model="Iker/ClickbaitFighter-2B",device_map="auto")
 
 
 
 
 
 
117
 
118
- summary = pipe(model_input)
119
 
120
- print(summary)
121
  ```
122
 
123
  # Evaluation Results
 
68
 
69
  # Usage example:
70
  ```python
71
+ import torch # pip install torch
72
+ from datasets import load_dataset # pip install datasets
73
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig # pip install transformers
74
+
75
 
76
  def prompt(
77
  headline: str,
 
105
  f"{body}\n"
106
  )
107
 
108
+ dataset = load_dataset("Iker/NoticIA")
109
+ example = dataset["test"][0]
110
 
111
+ prompt = prompt(headline=example["web_headline"], body=example["web_text"])
112
 
113
+ tokenizer = AutoTokenizer.from_pretrained("Iker/ClickbaitFighter-2B")
114
+ model = AutoModelForCausalLM.from_pretrained(
115
+ "Iker/ClickbaitFighter-2B", torch_dtype=torch.bfloat16, device_map="auto"
116
+ )
117
 
118
+ formatted_prompt = tokenizer.apply_chat_template(
119
+ [{"role": "user", "content": prompt}],
120
+ tokenize=False,
121
+ add_generation_prompt=True,
122
+ )
123
 
124
+ model_inputs = tokenizer(
125
+ [formatted_prompt], return_tensors="pt", add_special_tokens=False
126
+ )
 
 
127
 
128
+ model_output = model.generate(**model_inputs.to(model.device), generation_config=GenerationConfig(
129
+ max_new_tokens=32,
130
+ min_new_tokens=1,
131
+ do_sample=False,
132
+ num_beams=1,
133
+ use_cache=True
134
+ ))
135
 
136
+ summary = tokenizer.batch_decode(model_output,skip_special_tokens=True)[0]
137
 
138
+ print(summary.strip().split("\n")[-1]) # Get only the summary, without the prompt.
139
  ```
140
 
141
  # Evaluation Results