picocreator commited on
Commit
fab2242
1 Parent(s): 607417a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -57
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import os, gc, copy
3
  from huggingface_hub import hf_hub_download
4
  from pynvml import *
5
 
@@ -39,45 +39,54 @@ model = RWKV(model=model_path, strategy=MODEL_STRAT)
39
  from rwkv.utils import PIPELINE
40
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Precomputation of the state
43
  def precompute_state(text):
44
  state = None
45
  text_encoded = pipeline.encode(text)
46
  _, state = model.forward(text_encoded, state)
47
- yield dict(state)
48
 
49
  # Precomputing the base instruction set
50
  INSTRUCT_PREFIX = f'''
51
- The following is a set of instruction rules, that can translate spoken text to zombie speak. And vice visa.
52
-
53
- # Zombie Speak Rules:
54
- - Replace syllables with "uh" or "argh"
55
- - Add "uh" and "argh" sounds between words
56
- - Repeat words and letters, especially vowels
57
- - Use broken grammar and omit small words like "the", "a", "is"
58
 
59
- # To go from zombie speak back to English:
60
- - Remove extra "uh" and "argh" sounds
61
- - Replace repeated letters with one instance
62
- - Add omitted small words like "the", "a", "is" back in
63
- - Fix grammar and sentence structure
64
 
65
- # Here are several examples:
 
 
 
66
 
67
- ## English:
68
- "Hello my friend, how are you today?"
69
- ## Zombie:
70
- "Hell-uh-argh myuh fruh-end, hargh-owuh argh yuh-uh toduh-ay?"
71
-
72
- ## Zombie:
73
- "Brargh-ains argh-uh foo-duh"
74
- ## English:
75
- "Brains are food"
76
-
77
- ## English:
78
- "Good morning! How are you today? I hope you are having a nice day. The weather is supposed to be sunny and warm this afternoon. Maybe we could go for a nice walk together and stop to get ice cream. That would be very enjoyable. Well, I will talk to you soon!"
79
- ## Zombie:
80
- "Guh-ood morngh-ing! Hargh-owuh argh yuh-uh toduh-ay? Iuh hargh-ope yuh-uh argh havi-uh-nguh nuh-ice duh-ay. Thuh-uh weath-uh-eruh izzuh suh-pose-duh tuh-uh beh sunn-eh an-duh war-muh thizuh aft-erng-oon. May-buh-uh weh coulduh gargh-oh fargh-oruh nuh-ice wal-guh-kuh toge-the-ruh an-duh stargh-op tuh-uh geh-etuh izz-creem. Tha-at wou-duh beh ve-reh uhn-joy-ab-buhl. Well, I wih-ll targh-alk tuh-uh yuh-oo soo-oon!"
81
 
82
  '''
83
 
@@ -85,8 +94,8 @@ The following is a set of instruction rules, that can translate spoken text to z
85
  PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
86
 
87
  # Translation logic
88
- def translate(text, target_language, inState=PREFIX_STATE):
89
- prompt = f"Translate the following text to {target_language}\n # Input Text:\n{text}\n\n# Output Text:\n"
90
  ctx = prompt.strip()
91
  all_tokens = []
92
  out_last = 0
@@ -95,7 +104,7 @@ def translate(text, target_language, inState=PREFIX_STATE):
95
 
96
  state = None
97
  if inState != None:
98
- state = dict(inState)
99
 
100
  # Clear GC
101
  gc.collect()
@@ -105,19 +114,43 @@ def translate(text, target_language, inState=PREFIX_STATE):
105
  # Generate things token by token
106
  for i in range(ctx_limit):
107
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
108
- token = pipeline.sample_logits(out)
109
  if token in [0]: # EOS token
110
  break
111
  all_tokens += [token]
112
  tmp = pipeline.decode(all_tokens[out_last:])
113
  if '\ufffd' not in tmp:
114
  out_str += tmp
115
- yield out_str.strip()
116
  out_last = i + 1
 
 
 
 
 
 
117
 
118
- if "# " in out_str and "\n#" in out_str :
119
- out_str = out_str.split("\n## ")[0].split("\n# ")[0]
120
- yield out_str.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  del out
123
  del state
@@ -127,7 +160,8 @@ def translate(text, target_language, inState=PREFIX_STATE):
127
  # if HAS_GPU == True :
128
  # torch.cuda.empty_cache()
129
 
130
- yield out_str.strip()
 
131
 
132
  # Languages
133
  LANGUAGES = [
@@ -219,6 +253,7 @@ LANGUAGES = [
219
  "Arabic (Tunisian Spoken)",
220
  "Rwanda",
221
  "Zulu",
 
222
  "Bulgarian",
223
  "Swedish",
224
  "Lombard",
@@ -243,37 +278,34 @@ LANGUAGES = [
243
 
244
  # Example data
245
  EXAMPLES = [
246
- ["Brargh-ains argh-uh foo-duh", "English"],
247
- ["I Want to eat your brains", "Zombie Speak"],
248
- ["Hello, how are you?", "French"],
249
- ["Hello, how are you?", "Spanish"],
250
- ["Hello, how are you?", "Chinese"],
251
- ["Bonjour, comment ça va?", "English"],
252
- ["Hola, ¿cómo estás?", "English"],
253
- ["你好吗?", "English"],
254
- ["Guten Tag, wie geht es Ihnen?", "English"],
255
- ["Привет, как ты?", "English"],
256
- ["مرحبًا ، كيف حالك؟", "English"],
257
  ]
258
-
259
  # Gradio interface
260
  with gr.Blocks(title=title) as demo:
261
  gr.HTML(f"<div style=\"text-align: center;\"><h1>RWKV-5 World v2 - {title}</h1></div>")
262
- gr.Markdown("This is the RWKV-5 World v2 1B5 model tailored for translation. With a halloween zombie speak twist")
263
 
264
  # Input and output components
265
- text = gr.Textbox(lines=5, label="Source Text", placeholder="Enter the text you want to translate...", default=EXAMPLES[0][0])
266
- target_language = gr.Dropdown(choices=LANGUAGES, label="Target Language", default=EXAMPLES[0][1])
 
267
  output = gr.Textbox(lines=5, label="Translated Text")
 
 
268
  submit = gr.Button("Translate", variant="primary")
269
 
270
  # Example data
271
- data = gr.Dataset(components=[text, target_language], samples=EXAMPLES, label="Example Translations", headers=["Text", "Target Language"])
272
 
273
  # Button action
274
- submit.click(translate, [text, target_language], [output])
275
- data.click(lambda x: x, [data], [text, target_language])
276
 
277
  # Gradio launch
278
  demo.queue(concurrency_count=1, max_size=10)
279
- demo.launch(share=False)
 
1
  import gradio as gr
2
+ import os, gc, copy, torch
3
  from huggingface_hub import hf_hub_download
4
  from pynvml import *
5
 
 
39
  from rwkv.utils import PIPELINE
40
  pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
41
 
42
+ # State copy
43
+ def universal_deepcopy(obj):
44
+ if hasattr(obj, 'clone'): # Assuming it's a tensor if it has a clone method
45
+ return obj.clone()
46
+ elif isinstance(obj, list):
47
+ return [universal_deepcopy(item) for item in obj]
48
+ else:
49
+ return copy.deepcopy(obj)
50
+
51
+ # For debgging mostly
52
+ def inspect_structure(obj, depth=0):
53
+ indent = " " * depth
54
+ obj_type = type(obj).__name__
55
+
56
+ if isinstance(obj, list):
57
+ print(f"{indent}List (length {len(obj)}):")
58
+ for item in obj:
59
+ inspect_structure(item, depth + 1)
60
+ elif isinstance(obj, dict):
61
+ print(f"{indent}Dict (length {len(obj)}):")
62
+ for key, value in obj.items():
63
+ print(f"{indent} Key: {key}")
64
+ inspect_structure(value, depth + 1)
65
+ else:
66
+ print(f"{indent}{obj_type}")
67
+
68
  # Precomputation of the state
69
  def precompute_state(text):
70
  state = None
71
  text_encoded = pipeline.encode(text)
72
  _, state = model.forward(text_encoded, state)
73
+ return state
74
 
75
  # Precomputing the base instruction set
76
  INSTRUCT_PREFIX = f'''
77
+ You are a translator bot that can translate text to any language.
78
+ And will respond only with the translated text, without additional comments.
 
 
 
 
 
79
 
80
+ ## From English:
81
+ It is not enough to know, we must also apply; it is not enough to will, we must also do.
82
+ ## To Polish:
83
+ Nie wystarczy wiedzieć, trzeba także zastosować; nie wystarczy chcieć, trzeba też działać.
 
84
 
85
+ ## From Spanish:
86
+ La muerte no nos concierne, porque mientras existamos, la muerte no está aquí. Y cuando llega, ya no existimos.
87
+ ## To English:
88
+ Death does not concern us, because as long as we exist, death is not here. And when it does come, we no longer exist.
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  '''
92
 
 
94
  PREFIX_STATE = precompute_state(INSTRUCT_PREFIX)
95
 
96
  # Translation logic
97
+ def translate(text, source_language, target_language, inState=PREFIX_STATE):
98
+ prompt = f"## From {source_language}:\n{text}\n\n## To {target_language}:\n"
99
  ctx = prompt.strip()
100
  all_tokens = []
101
  out_last = 0
 
104
 
105
  state = None
106
  if inState != None:
107
+ state = universal_deepcopy(inState)
108
 
109
  # Clear GC
110
  gc.collect()
 
114
  # Generate things token by token
115
  for i in range(ctx_limit):
116
  out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
117
+ token = pipeline.sample_logits(out, temperature=0.1, top_p=0.5)
118
  if token in [0]: # EOS token
119
  break
120
  all_tokens += [token]
121
  tmp = pipeline.decode(all_tokens[out_last:])
122
  if '\ufffd' not in tmp:
123
  out_str += tmp
 
124
  out_last = i + 1
125
+ else:
126
+ return out_str.strip()
127
+
128
+ if "\n:" in out_str :
129
+ out_str = out_str.split("\n\nHuman:")[0].split("\nHuman:")[0]
130
+ return out_str.strip()
131
 
132
+ if "{source_language}:" in out_str :
133
+ out_str = out_str.split("{source_language}:")[0]
134
+ return out_str.strip()
135
+
136
+ if "{target_language}:" in out_str :
137
+ out_str = out_str.split("{target_language}:")[0]
138
+ return out_str.strip()
139
+
140
+ if "\nHuman:" in out_str :
141
+ out_str = out_str.split("\n\nHuman:")[0].split("\nHuman:")[0]
142
+ return out_str.strip()
143
+
144
+ if "\nAssistant:" in out_str :
145
+ out_str = out_str.split("\n\nAssistant:")[0].split("\nAssistant:")[0]
146
+ return out_str.strip()
147
+
148
+ if "\n#" in out_str :
149
+ out_str = out_str.split("\n\n#")[0].split("\n#")[0]
150
+ return out_str.strip()
151
+
152
+ # Yield for streaming
153
+ yield out_str.strip()
154
 
155
  del out
156
  del state
 
160
  # if HAS_GPU == True :
161
  # torch.cuda.empty_cache()
162
 
163
+ # yield out_str.strip()
164
+ return out_str.strip()
165
 
166
  # Languages
167
  LANGUAGES = [
 
253
  "Arabic (Tunisian Spoken)",
254
  "Rwanda",
255
  "Zulu",
256
+ "Latin",
257
  "Bulgarian",
258
  "Swedish",
259
  "Lombard",
 
278
 
279
  # Example data
280
  EXAMPLES = [
281
+ # More people would learn from their mistakes if they weren't so busy denying them.
282
+ ["Többen tanulnának a hibáikból, ha nem lennének annyira elfoglalva, hogy tagadják azokat.", "Hungarian", "English"],
283
+ ["La mejor venganza es el éxito masivo.", "Spanish", "English"],
284
+ ["Tout est bien qui finit bien.", "French", "English"],
285
+ ["Lasciate ogne speranza, voi ch'intrate.", "Italian", "English"],
286
+ ["Errare humanum est.", "Latin", "English"],
 
 
 
 
 
287
  ]
 
288
  # Gradio interface
289
  with gr.Blocks(title=title) as demo:
290
  gr.HTML(f"<div style=\"text-align: center;\"><h1>RWKV-5 World v2 - {title}</h1></div>")
291
+ gr.Markdown("This is the RWKV-5 World v2 1B5 model tailored for translation tasks")
292
 
293
  # Input and output components
294
+ text = gr.Textbox(lines=5, label="Source Text", placeholder="Enter the text you want to translate...", value=EXAMPLES[0][0])
295
+ source_language = gr.Dropdown(choices=LANGUAGES, label="Source Language", value=EXAMPLES[0][1])
296
+ target_language = gr.Dropdown(choices=LANGUAGES, label="Target Language", value=EXAMPLES[0][2])
297
  output = gr.Textbox(lines=5, label="Translated Text")
298
+
299
+ # Submission
300
  submit = gr.Button("Translate", variant="primary")
301
 
302
  # Example data
303
+ data = gr.Dataset(components=[text, source_language, target_language], samples=EXAMPLES, label="Example Translations", headers=["Source Text", "Target Language"])
304
 
305
  # Button action
306
+ submit.click(translate, [text, source_language, target_language], [output])
307
+ data.click(lambda x: x, [data], [text, source_language, target_language])
308
 
309
  # Gradio launch
310
  demo.queue(concurrency_count=1, max_size=10)
311
+ demo.launch(share=False, debug=True)