Doron Adler commited on
Commit
b6c25db
โ€ข
1 Parent(s): c8ddd98

Updated model

Browse files
app.py CHANGED
@@ -1,8 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
 
 
 
 
3
  import argparse
4
  import re
5
- import os
6
 
7
  import streamlit as st
8
  import random
@@ -11,8 +13,6 @@ import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import tokenizers
13
 
14
- #os.environ["TOKENIZERS_PARALLELISM"] = "false"
15
-
16
  random.seed(None)
17
  suggested_text_list = ['ื”ืฉื“ ื”ื•ืคื™ืข ืžื•ืœ','ืงืืœื™ ืฉืœืคื” ืืช','ืคืขื ืื—ืช ืœืคื ื™ ืฉื ื™ื ืจื‘ื•ืช', 'ื”ืืจื™ ืคื•ื˜ืจ ื—ื™ื™ืš ื—ื™ื•ืš ื ื‘ื•ืš', 'ื•ืื– ื”ืคืจืชื™ ืืช ื›ืœ ื›ืœืœื™ ื”ื˜ืงืก ื›ืฉ']
18
 
@@ -24,7 +24,9 @@ def load_model(model_name):
24
 
25
  def extend(input_text, max_size=20, top_k=50, top_p=0.95, temperature=0.7):
26
  if len(input_text) == 0:
27
- input_text = ""
 
 
28
 
29
  encoded_prompt = tokenizer.encode(
30
  input_text, add_special_tokens=False, return_tensors="pt")
@@ -83,7 +85,7 @@ if __name__ == "__main__":
83
  model, tokenizer = load_model("./model")
84
 
85
  stop_token = "<|endoftext|>"
86
- new_lines = "\n\n\n"
87
 
88
  np.random.seed(None)
89
  random_seed = np.random.randint(10000,size=1)
 
1
  # -*- coding: utf-8 -*-
2
 
3
+ import os
4
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
+
6
  import argparse
7
  import re
 
8
 
9
  import streamlit as st
10
  import random
 
13
  from transformers import AutoTokenizer, AutoModelForCausalLM
14
  import tokenizers
15
 
 
 
16
  random.seed(None)
17
  suggested_text_list = ['ื”ืฉื“ ื”ื•ืคื™ืข ืžื•ืœ','ืงืืœื™ ืฉืœืคื” ืืช','ืคืขื ืื—ืช ืœืคื ื™ ืฉื ื™ื ืจื‘ื•ืช', 'ื”ืืจื™ ืคื•ื˜ืจ ื—ื™ื™ืš ื—ื™ื•ืš ื ื‘ื•ืš', 'ื•ืื– ื”ืคืจืชื™ ืืช ื›ืœ ื›ืœืœื™ ื”ื˜ืงืก ื›ืฉ']
18
 
 
24
 
25
  def extend(input_text, max_size=20, top_k=50, top_p=0.95, temperature=0.7):
26
  if len(input_text) == 0:
27
+ input_text = "<|startoftext|>"
28
+ else:
29
+ input_text = "<|startoftext|>" + input_text
30
 
31
  encoded_prompt = tokenizer.encode(
32
  input_text, add_special_tokens=False, return_tensors="pt")
 
85
  model, tokenizer = load_model("./model")
86
 
87
  stop_token = "<|endoftext|>"
88
+ new_lines = "<|pad|>"
89
 
90
  np.random.seed(None)
91
  random_seed = np.random.randint(10000,size=1)
model/added_tokens.json CHANGED
File without changes
model/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./FantasyChildrenScifi-hebrew-gpt_neo-small/model",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
 
1
  {
2
+ "_name_or_path": "Norod78/hebrew-gpt_neo-small",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
model/merges.txt CHANGED
File without changes
model/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25e83f166977308069becae45ac59d48a2c08c0de8b3135a9acb63455fc0aec9
3
  size 551197393
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25bdb066c638e0f8e2da703f4ecd9223448fcf282fd46e354a47e817656af8e5
3
  size 551197393
model/special_tokens_map.json CHANGED
File without changes
model/tokenizer.json CHANGED
File without changes
model/tokenizer_config.json CHANGED
@@ -7,7 +7,7 @@
7
  "errors": "replace",
8
  "full_tokenizer_file": null,
9
  "max_len": 1024,
10
- "name_or_path": "./FantasyChildrenScifi-hebrew-gpt_neo-small/model",
11
  "pad_token": "<|pad|>",
12
  "special_tokens_map_file": "special_tokens_map.json",
13
  "tokenizer_class": "GPT2Tokenizer",
@@ -18,5 +18,6 @@
18
  "normalized": true,
19
  "rstrip": false,
20
  "single_word": false
21
- }
 
22
  }
 
7
  "errors": "replace",
8
  "full_tokenizer_file": null,
9
  "max_len": 1024,
10
+ "name_or_path": "Norod78/hebrew-gpt_neo-small",
11
  "pad_token": "<|pad|>",
12
  "special_tokens_map_file": "special_tokens_map.json",
13
  "tokenizer_class": "GPT2Tokenizer",
 
18
  "normalized": true,
19
  "rstrip": false,
20
  "single_word": false
21
+ },
22
+ "unknown_token": "<|unknown|>"
23
  }
model/vocab.json CHANGED
File without changes