Tonic commited on
Commit
3d8d7f7
1 Parent(s): 0292591

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -6
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import spaces
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  import torch
5
  from gradio_rich_textbox import RichTextbox
6
 
@@ -16,15 +16,35 @@ model_path = "NousResearch/Genstruct-7B"
16
  tokenizer = AutoTokenizer.from_pretrained(model_path)
17
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
18
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map='cuda', quantization_config=quantization_config)
 
 
19
 
20
  @spaces.GPU
21
- def generate_text(usertitle, content, max_length, temperature):
22
- input_text = {'title': usertitle, 'content': content}
23
  inputs = tokenizer.apply_chat_template(input_text, return_tensors='pt').cuda()
24
- generated_text = tokenizer.decode(model.generate(inputs, max_new_tokens=max_length, temperature=temperature, do_sample=True)[0]).strip().split(tokenizer.eos_token)[0]
25
- # split_text = generated_text.split(tokenizer.eos_token)[0]
26
 
27
- return generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def gradio_app():
30
  with gr.Blocks() as demo:
 
1
  import spaces
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
4
  import torch
5
  from gradio_rich_textbox import RichTextbox
6
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_path)
17
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
18
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map='cuda', quantization_config=quantization_config)
19
+ rm_tokenizer = AutoTokenizer.from_pretrained('OpenAssistant/reward-model-deberta-v3-large-v2')
20
+ rm_model = AutoModelForSequenceClassification.from_pretrained('OpenAssistant/reward-model-deberta-v3-large-v2', torch_dtype=torch.bfloat16)
21
 
22
  @spaces.GPU
23
+ def generate_text(usertitle, content, max_length, temperature, N=3):
24
+ input_text = {'title': usertitle, 'content': content}
25
  inputs = tokenizer.apply_chat_template(input_text, return_tensors='pt').cuda()
26
+ generated_sequences = model.generate(inputs, max_new_tokens=max_length, temperature=temperature, num_return_sequences=N, do_sample=True)
27
+ decoded_sequences = tokenizer.batch_decode(generated_sequences, skip_special_tokens=True)
28
 
29
+ def extract_pair(resp):
30
+ try:
31
+ response = resp.split('[[[Content]]]')[1]
32
+ inst, resp = resp.split('[[[User]]]')[:2]
33
+ return inst.strip(), resp.strip()
34
+ except ValueError:
35
+ return "", ""
36
+
37
+ def score(resp):
38
+ inst, resp = extract_pair(resp)
39
+ with torch.no_grad():
40
+ inputs = rm_tokenizer(inst, resp, return_tensors='pt', truncation=True, max_length=512)
41
+ logits = rm_model(**inputs).logits
42
+ score = float(logits[0].cpu())
43
+ return score
44
+
45
+ best_sequence = max(decoded_sequences, key=score)
46
+
47
+ return best_sequence
48
 
49
  def gradio_app():
50
  with gr.Blocks() as demo: