Norod78 commited on
Commit
811b009
·
verified ·
1 Parent(s): bc4ac2d

Provide the previous prompt as "History"

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -26,7 +26,6 @@ If you duplicate this space, make sure you have access to [meta-llama/Llama-2-7b
26
  because this model uses it as a tokenizer.
27
 
28
  # Note: Use this model for only for completing sentences and instruction following.
29
- ## While the user interface is a chatbot for convenience, this is an instruction tuned model not fine-tuned for chatbot tasks. As such, the model is not provided a chat history and will complete your text based on the last given prompt only.
30
  """
31
 
32
  LICENSE = """
@@ -35,8 +34,6 @@ LICENSE = """
35
  ---
36
  As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
37
  this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
38
-
39
- This demo Space was created by [Doron Adler](https://linktr.ee/Norod78)
40
  """
41
 
42
  if not torch.cuda.is_available():
@@ -51,6 +48,7 @@ if torch.cuda.is_available():
51
  if tokenizer.pad_token == None:
52
  tokenizer.pad_token = tokenizer.eos_token
53
  tokenizer.pad_token_id = tokenizer.eos_token_id
 
54
 
55
  @spaces.GPU
56
  def generate(
@@ -63,6 +61,13 @@ def generate(
63
  repetition_penalty: float = 1.4,
64
  ) -> Iterator[str]:
65
 
 
 
 
 
 
 
 
66
  input_ids = tokenizer([message], return_tensors="pt").input_ids
67
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
68
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -82,7 +87,7 @@ def generate(
82
  pad_token_id = tokenizer.eos_token_id,
83
  repetition_penalty=repetition_penalty,
84
  no_repeat_ngram_size=5,
85
- early_stopping=True,
86
  )
87
  t = Thread(target=model.generate, kwargs=generate_kwargs)
88
  t.start()
 
26
  because this model uses it as a tokenizer.
27
 
28
  # Note: Use this model for only for completing sentences and instruction following.
 
29
  """
30
 
31
  LICENSE = """
 
34
  ---
35
  As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
36
  this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
 
 
37
  """
38
 
39
  if not torch.cuda.is_available():
 
48
  if tokenizer.pad_token == None:
49
  tokenizer.pad_token = tokenizer.eos_token
50
  tokenizer.pad_token_id = tokenizer.eos_token_id
51
+ model.config.pad_token_id = tokenizer.eos_token_id
52
 
53
  @spaces.GPU
54
  def generate(
 
61
  repetition_penalty: float = 1.4,
62
  ) -> Iterator[str]:
63
 
64
+ historical_text = ""
65
+ #Prepend the entire chat history to the message with new lines between each message
66
+ for user, assistant in chat_history:
67
+ historical_text += f"\n{user}\n{assistant}"
68
+
69
+ if len(historical_text) > 0:
70
+ message = historical_text + f"\n{message}"
71
  input_ids = tokenizer([message], return_tensors="pt").input_ids
72
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
73
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
 
87
  pad_token_id = tokenizer.eos_token_id,
88
  repetition_penalty=repetition_penalty,
89
  no_repeat_ngram_size=5,
90
+ early_stopping=False,
91
  )
92
  t = Thread(target=model.generate, kwargs=generate_kwargs)
93
  t.start()