jed-tiotuico commited on
Commit
34c50f2
1 Parent(s): 49fb5e7

added printer generated question

Browse files
Files changed (2) hide show
  1. app.py +72 -0
  2. seed_tasks.jsonl +0 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import datetime
2
  from google.protobuf import message
3
  import torch
 
4
  import time
5
  import threading
6
  import streamlit as st
@@ -23,6 +24,55 @@ if device == "cpu":
23
  # check if mps is available
24
  device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def get_model_tokenizer(sota_model_name):
27
  model, tokenizer = FastLanguageModel.from_pretrained(
28
  model_name = "jed-tiotuico/twitter-llama",
@@ -293,6 +343,28 @@ if st.button("your website is straight up garbage. how do you sell high end tech
293
  )
294
  )
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  # - Generate Customer Tweet
297
  if st.button("Generate Customer Message using Few Shots"):
298
  model, tokenizer = get_mistral_model_tokenizer(sota_model_name)
 
1
  import datetime
2
  from google.protobuf import message
3
  import torch
4
+ import json
5
  import time
6
  import threading
7
  import streamlit as st
 
24
  # check if mps is available
25
  device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
26
 
27
+ printer_models = [
28
+ "HP Smart Tank 750",
29
+ "HP LaserJet Pro",
30
+ "HP LaserJet 4100",
31
+ "HP LaserJet 4000",
32
+ "HP Photosmart C4635",
33
+ "HP OfficeJet Pro 9015",
34
+ "HP Envy 6055",
35
+ "HP DeskJet 3755",
36
+ "HP Color LaserJet MFP M283fdw",
37
+ "HP DesignJet T630",
38
+ "HP PageWide Pro 477dw",
39
+ "HP LaserJet Enterprise M506",
40
+ "HP OfficeJet 5255",
41
+ "HP Envy Photo 7855",
42
+ "HP LaserJet Pro M404dn",
43
+ "HP DeskJet Plus 4155",
44
+ "HP LaserJet Enterprise MFP M528f",
45
+ "HP Neverstop Laser 1001nw",
46
+ "HP Tango X",
47
+ "HP Color LaserJet Pro M255dw",
48
+ "HP Smart Tank Plus 651",
49
+ "HP LaserJet Pro MFP M428fdw",
50
+ "HP OfficeJet Pro 8035",
51
+ "HP Envy 6075",
52
+ "HP DeskJet 2622",
53
+ "HP LaserJet Pro M15w"
54
+ ]
55
+
56
+ def generate_printer_prompt(prompt_instructions):
57
+ """Encode multiple prompt instructions into a single string."""
58
+
59
+ prompt = """
60
+ Come up with a printer related task or question that a person might ask for support.
61
+ no further text/explanation, no additional information.
62
+ Ensure the tasks/questions should follow the same style and complexity
63
+ Examples:
64
+ """
65
+ for idx, instruction in enumerate(prompt_instructions):
66
+ instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":")
67
+ # pick one random printer model to replace the placeholder
68
+ printer_model = random.choice(printer_models)
69
+ instruction = re.sub(r"<\|hp-printer\|>", printer_model, instruction)
70
+ prompt += f"Q: {instruction}\n\n"
71
+ # prompt += f"{len(prompt_instructions) + 1}. Q:"
72
+ prompt += "Now it's your turn, come up with a printer task/question that a person might ask for support.\n"
73
+ prompt += "Q: (your task/question)"
74
+ return prompt
75
+
76
  def get_model_tokenizer(sota_model_name):
77
  model, tokenizer = FastLanguageModel.from_pretrained(
78
  model_name = "jed-tiotuico/twitter-llama",
 
343
  )
344
  )
345
 
346
+ if st.button("Generate printer task/question"):
347
+ seed_tasks = [json.loads(l) for l in open("printer-seed.jsonl", "r")]
348
+ seed_instructions = [t["text"] for t in seed_tasks]
349
+ prompt_instructions = []
350
+ prompt_instructions += random.sample(seed_instructions, num_prompt_instructions - len(prompt_instructions))
351
+ random.shuffle(prompt_instructions)
352
+ customer_msg = generate_printer_prompt(prompt_instructions)
353
+ st.session_state["user_msg_as_prompt"] = customer_msg
354
+ write_user_chat_message(user_chat, customer_msg)
355
+ model, tokenizer = get_model_tokenizer(sota_model_name)
356
+ input_text = alpaca_input_text_format.format(customer_msg)
357
+ st.write(f"```\n{input_text}```")
358
+ assistant_chat.write_stream(
359
+ stream_generation(
360
+ input_text,
361
+ show_prompt=False,
362
+ tokenizer=tokenizer,
363
+ model=model,
364
+ temperature=0.5,
365
+ )
366
+ )
367
+
368
  # - Generate Customer Tweet
369
  if st.button("Generate Customer Message using Few Shots"):
370
  model, tokenizer = get_mistral_model_tokenizer(sota_model_name)
seed_tasks.jsonl ADDED
The diff for this file is too large to render. See raw diff