Norod78 commited on
Commit
00e184d
โ€ข
1 Parent(s): de72a99

Update to use Gradio, Deepspeed and TextIteratorStreamer

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +62 -130
  3. requirements.txt +2 -1
README.md CHANGED
@@ -3,7 +3,7 @@ title: Hebrew GPT Neo - Science Fiction and Fantasy
3
  emoji: ๐Ÿง™โ€โ™€๏ธ
4
  colorFrom: yellow
5
  colorTo: blue
6
- sdk: streamlit
7
  app_file: app.py
8
  pinned: false
9
  license: mit
 
3
  emoji: ๐Ÿง™โ€โ™€๏ธ
4
  colorFrom: yellow
5
  colorTo: blue
6
+ sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  license: mit
app.py CHANGED
@@ -1,134 +1,66 @@
1
  # -*- coding: utf-8 -*-
2
 
3
- import os
4
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
-
6
- import argparse
7
- import re
8
-
9
- import streamlit as st
10
- import random
11
- import numpy as np
12
  import torch
13
- from transformers import AutoTokenizer, AutoModelForCausalLM
14
- import tokenizers
15
-
16
- random.seed(None)
17
- suggested_text_list = ['ื”ืฉื“ ื”ื•ืคื™ืข ืžื•ืœ','ืงืืœื™ ืฉืœืคื” ืืช','ืคืขื ืื—ืช ืœืคื ื™ ืฉื ื™ื ืจื‘ื•ืช', 'ื”ืืจื™ ืคื•ื˜ืจ ื—ื™ื™ืš ื—ื™ื•ืš ื ื‘ื•ืš', 'ื•ืื– ื”ืคืจืชื™ ืืช ื›ืœ ื›ืœืœื™ ื”ื˜ืงืก ื›ืฉ']
18
-
19
- @st.cache_resource
20
- def load_model(model_name):
21
- tokenizer = AutoTokenizer.from_pretrained(model_name)
22
- model = AutoModelForCausalLM.from_pretrained(model_name)
23
- return model, tokenizer
24
-
25
- def extend(input_text, max_size=20, top_k=50, top_p=0.95, temperature=0.7):
26
- if len(input_text) == 0:
27
- input_text = "<|startoftext|>"
28
- else:
29
- input_text = "<|startoftext|>" + input_text
30
-
31
- encoded_prompt = tokenizer.encode(
32
- input_text, add_special_tokens=False, return_tensors="pt")
33
-
34
- encoded_prompt = encoded_prompt.to(device)
35
-
36
- if encoded_prompt.size()[-1] == 0:
37
- input_ids = None
38
- else:
39
- input_ids = encoded_prompt
40
-
41
- output_sequences = model.generate(
42
- input_ids=input_ids,
43
- max_length=max_size + len(encoded_prompt[0]),
44
- top_k=top_k,
45
- top_p=top_p,
46
- temperature=temperature,
47
- do_sample=True,
48
- repetition_penalty=2.0,
49
- num_return_sequences=1)
50
-
51
- # Remove the batch dimension when returning multiple sequences
52
- if len(output_sequences.shape) > 2:
53
- output_sequences.squeeze_()
54
-
55
- generated_sequences = []
56
-
57
- for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
58
- generated_sequence = generated_sequence.tolist()
59
-
60
- # Decode text
61
- text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
62
-
63
- # Remove all text after the stop token
64
- text = text[: text.find(stop_token) if stop_token else None]
65
-
66
- # Remove all text after 3 newlines
67
- text = text[: text.find(new_lines) if new_lines else None]
68
-
69
- # Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
70
- total_sequence = (
71
- input_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :]
72
- )
73
-
74
- generated_sequences.append(total_sequence)
75
-
76
- parsed_text = total_sequence.replace("<|startoftext|>", "").replace("\r","").replace("\n\n", "\n")
77
- if len(parsed_text) == 0:
78
- parsed_text = "ืฉื’ื™ืื”"
79
- return parsed_text
80
-
81
-
82
-
83
- if __name__ == "__main__":
84
- st.title("Hebrew text generator: Science Fiction and Fantasy (GPT-Neo)")
85
- model, tokenizer = load_model("./model")
86
-
87
- stop_token = "<|endoftext|>"
88
- new_lines = "<|pad|>"
89
-
90
- np.random.seed(None)
91
- random_seed = np.random.randint(10000,size=1)
92
-
93
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
94
- n_gpu = 0 if torch.cuda.is_available()==False else torch.cuda.device_count()
95
-
96
- torch.manual_seed(random_seed)
97
- if n_gpu > 0:
98
- torch.cuda.manual_seed_all(random_seed)
99
-
100
- model.to(device)
101
-
102
- text_area = st.text_area("Enter the first few words (or leave blank), tap on \"Generate Text\" below. Tapping again will produce a different result.", 'ื”ืื“ื ื”ืื—ืจื•ืŸ ืขืœื™ ืื“ืžื•ืช ื™ืฉื‘ ืœื‘ื“ ื‘ื—ื“ืจื• ื›ืฉืœืคืชืข ื ืฉืžืขื” ื“ืคื™ืงื”')
103
-
104
- st.sidebar.subheader("Configurable parameters")
105
-
106
- max_len = st.sidebar.slider("Max-Length", 0, 256, 160,help="The maximum length of the sequence to be generated.")
107
- top_k = st.sidebar.slider("Top-K", 0, 100, 40, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
108
- top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.92, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.")
109
- temperature = st.sidebar.slider("Temperature", 0.0, 1.0, 1.0, help="The value used to module the randomness of the output tokens.")
110
-
111
- if st.button("Generate Text"):
112
- with st.spinner(text="Generating results..."):
113
- st.subheader("Result")
114
- print(f"device:{device}, n_gpu:{n_gpu}, random_seed:{random_seed}, maxlen:{max_len}, top_k:{top_k}, top_p:{top_p}")
115
- if len(text_area.strip()) == 0:
116
- text_area = random.choice(suggested_text_list)
117
- result = extend(input_text=text_area,
118
- max_size=int(max_len),
119
- top_k=int(top_k),
120
- top_p=float(top_p),
121
- temperature=float(temperature))
122
-
123
- print("Done length: " + str(len(result)) + " bytes")
124
- #<div class="rtl" dir="rtl" style="text-align:right;">
125
- st.markdown(f"<p dir=\"rtl\" style=\"text-align:right;\"> {result} </p>", unsafe_allow_html=True)
126
- st.write("\n\nResult length: " + str(len(result)) + " bytes\n Random seed: " + str(random_seed) + "\ntop_k: " + str(top_k) + "\ntop_p: " + str(top_p) + "\ntemperature: " + str(temperature) + "\nmax_len: " + str(max_len) + "\ndevice: " + str(device) + "\nn_gpu: " + str(n_gpu))
127
- print(f"\"{result}\"")
128
-
129
- st.markdown(
130
- """Hebrew text generation model based on EleutherAI's gpt-neo architecture. Originally trained on a TPUv3-8 which was made avilable to me via the [TPU Research Cloud Program](https://sites.research.google/trc/). The model was then slightly fine-tuned upon science fiction and fantasy text."""
131
- )
132
-
133
- st.markdown("<footer><hr><p style=\"font-size:14px\">The site is fan made and is not affiliated with any author in any way.</p><p style=\"font-size:12px\">By <a href=\"https://linktr.ee/Norod78\">Doron Adler</a></p></footer> ", unsafe_allow_html=True)
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
 
3
+ import gradio as gr
 
 
 
 
 
 
 
 
4
  import torch
5
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from threading import Thread
7
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ model_id = './model'
10
+
11
+ CUDA_AVAILABLE = torch.cuda.is_available()
12
+ device = torch.device("cuda" if CUDA_AVAILABLE else "cpu")
13
+
14
+ generator = pipeline('text-generation', model=model_id,
15
+ tokenizer=model_id,
16
+ load_in_8bit=True,
17
+ device=device)
18
+
19
+ early_stop_pattern = "\n\n\n"
20
+ print(f'Early stop pattern = \"{early_stop_pattern}\"')
21
+
22
+ model = generator.model
23
+ tok = generator.tokenizer
24
+
25
+ stop_token = tok.eos_token
26
+ print(f'stop_token = \"{stop_token}\"')
27
+
28
+ def generate(text = ""):
29
+ print("Create streamer")
30
+ yield "[ืื ื ื”ืžืชื™ื ื• ืœืชืฉื•ื‘ื”]"
31
+ streamer = TextIteratorStreamer(tok, timeout=5.)
32
+ if len(text) == 0:
33
+ text = "\n"
34
+
35
+ inputs = tok([text], return_tensors="pt").to(device)
36
+ generation_kwargs = dict(inputs, streamer=streamer, repetition_penalty=2.5, do_sample=True, top_k=40, top_p=0.2, temperature=0.4, num_beams = 1 ,max_new_tokens=128, pad_token_id = model.config.eos_token_id, early_stopping=True, no_repeat_ngram_size=4)
37
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
38
+ thread.start()
39
+ generated_text = ""
40
+ for new_text in streamer:
41
+ yield generated_text + new_text
42
+ print(new_text, end ="")
43
+ generated_text += new_text
44
+ if (early_stop_pattern in generated_text) or (stop_token in new_text):
45
+ generated_text = generated_text[: generated_text.find(early_stop_pattern) if early_stop_pattern else None]
46
+ generated_text = generated_text[: generated_text.find(stop_token) if stop_token else None]
47
+ streamer.end()
48
+ print("\n--\n")
49
+ yield generated_text
50
+ return generated_text
51
+
52
+ return generated_text
53
+
54
+ demo = gr.Interface(
55
+ title="Hebrew text generator: Science Fiction and Fantasy (GPT-Neo)",
56
+ fn=generate,
57
+ inputs=gr.Textbox(label="ื›ืชื‘ื• ื›ืืŸ ืืช ื”ื˜ืงืกื˜ ืฉืœื›ื ืื• ื”ืฉืื™ืจื• ืจื™ืง", elem_id="input_text"),
58
+ outputs=gr.Textbox(type="text", label="ืคื” ื™ื•ืคื™ืข ื”ื˜ืงืกื˜ ืฉื”ืžื—ื•ืœืœ ื™ื—ื•ืœืœ", elem_id="output_text"),
59
+ css="#output_text{direction: rtl} #input_text{direction: rtl}",
60
+ examples = ['ื”ืฉื“ ื”ื•ืคื™ืข ืžื•ืœ','ืงืืœื™ ืฉืœืคื” ืืช','ืคืขื ืื—ืช ืœืคื ื™ ืฉื ื™ื ืจื‘ื•ืช', 'ื”ืืจื™ ืคื•ื˜ืจ ื—ื™ื™ืš ื—ื™ื•ืš ื ื‘ื•ืš', 'ื•ืื– ื”ืคืจืชื™ ืืช ื›ืœ ื›ืœืœื™ ื”ื˜ืงืก ื›ืฉ'],
61
+ allow_flagging=False
62
+ )
63
+
64
+ demo.queue()
65
+ #demo.launch(debug=True)
66
+ demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- streamlit
2
  transformers
3
  tokenizers
4
  torch
 
 
1
+ gradio
2
  transformers
3
  tokenizers
4
  torch
5
+ deepspeed