awinml commited on
Commit
d5f97c8
1 Parent(s): ea9e5f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -7,7 +7,7 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
- MAX_MAX_NEW_TOKENS = 2048
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
@@ -15,19 +15,12 @@ DESCRIPTION = """\
15
  # Gemma 2B on GPU
16
  """
17
 
18
- LICENSE = """
19
- <p/>
20
- ---
21
- As a derivate work of [Llama-2-13b-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat) by Meta,
22
- this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/USE_POLICY.md).
23
- """
24
-
25
  if not torch.cuda.is_available():
26
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
27
 
28
 
29
  if torch.cuda.is_available():
30
- model_id = "teknium/OpenHermes-2.5-Mistral-7B"
31
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
32
  tokenizer = AutoTokenizer.from_pretrained(model_id)
33
  tokenizer.use_default_system_prompt = False
@@ -39,7 +32,7 @@ def generate(
39
  chat_history: list[tuple[str, str]],
40
  system_prompt: str,
41
  max_new_tokens: int = 1024,
42
- temperature: float = 0.6,
43
  top_p: float = 0.9,
44
  top_k: int = 50,
45
  repetition_penalty: float = 1.2,
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
+ MAX_MAX_NEW_TOKENS = 4096
11
  DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
 
15
  # Gemma 2B on GPU
16
  """
17
 
 
 
 
 
 
 
 
18
  if not torch.cuda.is_available():
19
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
20
 
21
 
22
  if torch.cuda.is_available():
23
+ model_id = "Nexusflow/Starling-LM-7B-beta"
24
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  tokenizer.use_default_system_prompt = False
 
32
  chat_history: list[tuple[str, str]],
33
  system_prompt: str,
34
  max_new_tokens: int = 1024,
35
+ temperature: float = 0.5,
36
  top_p: float = 0.9,
37
  top_k: int = 50,
38
  repetition_penalty: float = 1.2,