Ericu950 commited on
Commit
4f40538
1 Parent(s): cdf638d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -47
README.md CHANGED
@@ -10,13 +10,9 @@ tags:
10
  - epigraphy
11
  - philology
12
  ---
13
-
14
  # Papy_1_Llama-3.1-8B-Instruct_date
15
-
16
  This is a fine-tuned version of the Llama-3.1-8B-Instruct model, specialized in assigning a date to Greek documentary papyri. On a test set of 1,856 unseen papyri its predictions were, on average, 21.7 years away from the actual date spans.
17
-
18
  ## Dataset
19
-
20
  This model was finetuned on the Ericu950/Papyri_1 dataset, which consists of Greek documentary papyri editions and their corresponding dates and geographical attributions sourced from the amazing Papyri.info.
21
 
22
  ## Usage
@@ -29,16 +25,12 @@ To run the model on a GPU with large memory capacity, follow these steps:
29
  import json
30
  from transformers import pipeline, AutoTokenizer, LlamaForCausalLM
31
  import torch
32
-
33
  model_id = "Ericu950/Papy_1_Llama-3.1-8B-Instruct_date"
34
-
35
  model = LlamaForCausalLM.from_pretrained(
36
  model_id,
37
  device_map="auto",
38
  )
39
-
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
41
-
42
  generation_pipeline = pipeline(
43
  "text-generation",
44
  model=model,
@@ -70,46 +62,37 @@ papyrus_edition = """
70
  εφοδον το τε βλαβοσ και επιτιμον αργυριου δραχμασ 0 και εισ το δημοσιον τασ ισασ και μηθεν
71
  ησσον· δ -----ιων ομολογιαν συνεχωρησεν·
72
  """
73
-
74
  system_prompt = "Date this papyrus fragment to an exact year!"
75
-
76
  input_messages = [
77
  {"role": "system", "content": system_prompt},
78
  {"role": "user", "content": papyrus_edition},
79
  ]
80
-
81
  terminators = [
82
  tokenizer.eos_token_id,
83
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
84
  ]
85
-
86
  outputs = generation_pipeline(
87
  input_messages,
88
- max_new_tokens=13,
89
  num_beams=45, # Set this as high as your memory will allow!
90
- num_return_sequences=3,
91
  early_stopping=True,
92
  )
93
-
94
  beam_contents = []
95
  for output in outputs:
96
  generated_text = output.get('generated_text', [])
97
  for item in generated_text:
98
  if item.get('role') == 'assistant':
99
  beam_contents.append(item.get('content'))
100
-
101
- real_response = "Oxyrynchos"
102
-
103
- print(f"Place of origin: {real_response}")
104
  for i, content in enumerate(beam_contents, start=1):
105
  print(f"Suggestion {i}: {content}")
106
  ```
107
  ### Expected Output:
108
  ```
109
- Place of origin: Oxyrynchos
110
- Suggestion 1: Oxyrhynchos
111
- Suggestion 2: Antinoopolis
112
- Suggestion 3: Alexandria
113
  ```
114
  ## Usage on free tier in Google Colab
115
 
@@ -135,18 +118,15 @@ os._exit(00)
135
  ```python
136
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
137
  import torch
138
-
139
  quant_config = BitsAndBytesConfig(
140
  load_in_4bit=True,
141
  bnb_4bit_quant_type="nf4",
142
  bnb_4bit_use_double_quant=True,
143
  bnb_4bit_compute_dtype=torch.bfloat16
144
  )
145
-
146
- model = AutoModelForCausalLM.from_pretrained("Ericu950/Papy_1_Llama-3.1-8B-Instruct_place",
147
  device_map = "auto", quantization_config = quant_config)
148
- tokenizer = AutoTokenizer.from_pretrained("Ericu950/Papy_1_Llama-3.1-8B-Instruct_place")
149
-
150
  generation_pipeline = pipeline(
151
  "text-generation",
152
  model=model,
@@ -176,42 +156,31 @@ papyrus_edition = """
176
  παραβαινειν, εκτεινειν δε τον παραβησομενον τωι υιωι διοσκορωι η τοισ παρ αυτου καθ εκαστην
177
  εφοδον το τε βλαβοσ και επιτιμον αργυριου δραχμασ 0 και εισ το δημοσιον τασ ισασ και μηθεν
178
  ησσον· δ -----ιων ομολογιαν συνεχωρησεν·"""
179
-
180
- system_prompt = "Assign this papyrus fragment to an exact place!"
181
-
182
  input_messages = [
183
  {"role": "system", "content": system_prompt},
184
  {"role": "user", "content": papyrus_edition},
185
  ]
186
-
187
  outputs = generation_pipeline(
188
  input_messages,
189
- max_new_tokens=13,
190
  num_beams=10,
191
- num_return_sequences=3,
192
  early_stopping=True,
193
  )
194
-
195
  beam_contents = []
196
  for output in outputs:
197
  generated_text = output.get('generated_text', [])
198
  for item in generated_text:
199
  if item.get('role') == 'assistant':
200
  beam_contents.append(item.get('content'))
201
-
202
- real_response = "Oxyrynchos"
203
-
204
- print(f"Place of origin: {real_response}")
205
  for i, content in enumerate(beam_contents, start=1):
206
  print(f"Suggestion {i}: {content}")
207
  ```
208
  ### Expected Output:
209
  ```
210
- Place of origin: Oxyrynchos
211
- Suggestion 1: Oxyrhynchos
212
- Suggestion 2: Antinoopolis
213
- Suggestion 3: Alexandria
214
- ```
215
-
216
-
217
-
 
10
  - epigraphy
11
  - philology
12
  ---
 
13
  # Papy_1_Llama-3.1-8B-Instruct_date
 
14
  This is a fine-tuned version of the Llama-3.1-8B-Instruct model, specialized in assigning a date to Greek documentary papyri. On a test set of 1,856 unseen papyri its predictions were, on average, 21.7 years away from the actual date spans.
 
15
  ## Dataset
 
16
  This model was finetuned on the Ericu950/Papyri_1 dataset, which consists of Greek documentary papyri editions and their corresponding dates and geographical attributions sourced from the amazing Papyri.info.
17
 
18
  ## Usage
 
25
  import json
26
  from transformers import pipeline, AutoTokenizer, LlamaForCausalLM
27
  import torch
 
28
  model_id = "Ericu950/Papy_1_Llama-3.1-8B-Instruct_date"
 
29
  model = LlamaForCausalLM.from_pretrained(
30
  model_id,
31
  device_map="auto",
32
  )
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
34
  generation_pipeline = pipeline(
35
  "text-generation",
36
  model=model,
 
62
  εφοδον το τε βλαβοσ και επιτιμον αργυριου δραχμασ 0 και εισ το δημοσιον τασ ισασ και μηθεν
63
  ησσον· δ -----ιων ομολογιαν συνεχωρησεν·
64
  """
 
65
  system_prompt = "Date this papyrus fragment to an exact year!"
 
66
  input_messages = [
67
  {"role": "system", "content": system_prompt},
68
  {"role": "user", "content": papyrus_edition},
69
  ]
 
70
  terminators = [
71
  tokenizer.eos_token_id,
72
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
73
  ]
 
74
  outputs = generation_pipeline(
75
  input_messages,
76
+ max_new_tokens=4,
77
  num_beams=45, # Set this as high as your memory will allow!
78
+ num_return_sequences=1,
79
  early_stopping=True,
80
  )
 
81
  beam_contents = []
82
  for output in outputs:
83
  generated_text = output.get('generated_text', [])
84
  for item in generated_text:
85
  if item.get('role') == 'assistant':
86
  beam_contents.append(item.get('content'))
87
+ real_response = "71 or 72 AD"
88
+ print(f"Year: {real_response}")
 
 
89
  for i, content in enumerate(beam_contents, start=1):
90
  print(f"Suggestion {i}: {content}")
91
  ```
92
  ### Expected Output:
93
  ```
94
+ Year: 71 or 72 AD
95
+ Suggestion 1: 71
 
 
96
  ```
97
  ## Usage on free tier in Google Colab
98
 
 
118
  ```python
119
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
120
  import torch
 
121
  quant_config = BitsAndBytesConfig(
122
  load_in_4bit=True,
123
  bnb_4bit_quant_type="nf4",
124
  bnb_4bit_use_double_quant=True,
125
  bnb_4bit_compute_dtype=torch.bfloat16
126
  )
127
+ model = AutoModelForCausalLM.from_pretrained("Ericu950/Papy_1_Llama-3.1-8B-Instruct_date",
 
128
  device_map = "auto", quantization_config = quant_config)
129
+ tokenizer = AutoTokenizer.from_pretrained("Ericu950/Papy_1_Llama-3.1-8B-Instruct_date")
 
130
  generation_pipeline = pipeline(
131
  "text-generation",
132
  model=model,
 
156
  παραβαινειν, εκτεινειν δε τον παραβησομενον τωι υιωι διοσκορωι η τοισ παρ αυτου καθ εκαστην
157
  εφοδον το τε βλαβοσ και επιτιμον αργυριου δραχμασ 0 και εισ το δημοσιον τασ ισασ και μηθεν
158
  ησσον· δ -----ιων ομολογιαν συνεχωρησεν·"""
159
+ system_prompt = "Date this papyrus fragment to an exact year!"
 
 
160
  input_messages = [
161
  {"role": "system", "content": system_prompt},
162
  {"role": "user", "content": papyrus_edition},
163
  ]
 
164
  outputs = generation_pipeline(
165
  input_messages,
166
+ max_new_tokens=4,
167
  num_beams=10,
168
+ num_return_sequences=1,
169
  early_stopping=True,
170
  )
 
171
  beam_contents = []
172
  for output in outputs:
173
  generated_text = output.get('generated_text', [])
174
  for item in generated_text:
175
  if item.get('role') == 'assistant':
176
  beam_contents.append(item.get('content'))
177
+ real_response = "71 or 72 AD"
178
+ print(f"Year: {real_response}")
 
 
179
  for i, content in enumerate(beam_contents, start=1):
180
  print(f"Suggestion {i}: {content}")
181
  ```
182
  ### Expected Output:
183
  ```
184
+ Year: 71 or 72 AD
185
+ Suggestion 1: 71
186
+ ```