pszemraj commited on
Commit
af12f2c
1 Parent(s): 5209ab6
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -13,8 +13,8 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStrea
13
 
14
  model_id = "pszemraj/flan-t5-large-instruct-dolly_hhrlhf"
15
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
16
- logging.info("Running on device:", torch_device)
17
- logging.info("CPU threads:", torch.get_num_threads())
18
 
19
 
20
  if torch_device == "cuda":
@@ -23,6 +23,11 @@ if torch_device == "cuda":
23
  )
24
  else:
25
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 
 
 
 
 
26
  tokenizer = AutoTokenizer.from_pretrained(model_id)
27
 
28
 
@@ -123,6 +128,14 @@ with gr.Blocks() as demo:
123
  interactive=True,
124
  label="Top-k",
125
  )
 
 
 
 
 
 
 
 
126
  repetition_penalty = gr.Slider(
127
  minimum=0.9,
128
  maximum=2.5,
 
13
 
14
  model_id = "pszemraj/flan-t5-large-instruct-dolly_hhrlhf"
15
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ logging.info(f"Running on device:\t {torch_device}")
17
+ logging.info(f"CPU threads:\t {torch.get_num_threads()}")
18
 
19
 
20
  if torch_device == "cuda":
 
23
  )
24
  else:
25
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
26
+ try:
27
+ model = torch.compile(model)
28
+ except Exception as e:
29
+ logging.error(f"Unable to compile model:\t{e}")
30
+
31
  tokenizer = AutoTokenizer.from_pretrained(model_id)
32
 
33
 
 
128
  interactive=True,
129
  label="Top-k",
130
  )
131
+ temperature = gr.Slider(
132
+ minimum=0.1,
133
+ maximum=5.0,
134
+ value=0.8,
135
+ step=0.1,
136
+ interactive=True,
137
+ label="Temperature",
138
+ )
139
  repetition_penalty = gr.Slider(
140
  minimum=0.9,
141
  maximum=2.5,