Norod78 commited on
Commit
d9673b7
1 Parent(s): d696c78

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -0
README.md CHANGED
@@ -1,3 +1,71 @@
1
  ---
 
 
 
 
 
 
 
 
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language: he
3
+
4
+ widget:
5
+ - text: "האיש האחרון עלי אדמות ישב לבד בחדרו כשלפתע נשמעה נקישה"
6
+ - text: "שם היצירה: "
7
+ - text: "\n\n שם היצירה:"
8
+ - text: "\n\n\n"
9
+
10
  license: mit
11
  ---
12
+
13
+ # Hebrew-GPT2-345M-Stage
14
+
15
+ An undertrained GPT2 based Hebrew text generation model which I slightly trained at 2020 on text from "Bama Hadasha" ("במה חדשה")
16
+
17
+ ## Dataset
18
+
19
+ ### Around 10% of [stage.co.il ](http://stage.co.il/)
20
+
21
+
22
+ #### Simple usage sample code
23
+
24
+ ```python
25
+
26
+
27
+ import os
28
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
29
+
30
+ from transformers import pipeline, set_seed
31
+ import random
32
+
33
+ model_id = "Norod78/Hebrew-GPT2-345M-Stage"
34
+ text_generator = pipeline('text-generation', model=model_id, tokenizer=model_id, device_map="auto")
35
+ max_length = 256
36
+ top_k = 70
37
+ top_p = 0.92
38
+ temperature = 1.0
39
+ max_seed = (2**32)-1
40
+ global_seed = random.randint(0, max_seed)
41
+
42
+ def text_generation(input_text = ''):
43
+ global global_seed
44
+ global_seed = global_seed + 1
45
+ if global_seed >= max_seed:
46
+ global_seed = 0
47
+ if input_text == None or len(input_text) == 0:
48
+ input_text = "\n"
49
+ set_seed(global_seed)
50
+ generated_text = text_generator(input_text,
51
+ max_length=max_length,
52
+ top_k=top_k,
53
+ top_p=top_p,
54
+ temperature=temperature,
55
+ do_sample=True,
56
+ repetition_penalty=1.4,
57
+ num_return_sequences=1)
58
+ parsed_text = generated_text[0]["generated_text"].replace("<|startoftext|>", "").replace("\r","").replace("\n\n", "\n").replace("\t", " ").replace("<|pad|>", " * ").replace("\"\"", "\"").strip()
59
+ #print("parsed_text = \"" + parsed_text + "\" (seed = " + str(global_seed) + ")")
60
+ return parsed_text
61
+
62
+ def main():
63
+ prompt_prefix = "\n\n שם היצירה: "
64
+ prompt_text = prompt_prefix + "חגבים ירוקים מקפצים בשדה"
65
+ result = text_generation(input_text=prompt_text)
66
+ print("result : \n" + str(result))
67
+
68
+ if __name__ == '__main__':
69
+ main()
70
+ ```
71
+