Spaces:
Running
Running
import gradio as gr | |
import torch | |
from peft import PeftModel, PeftConfig, LoraConfig | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# import torch | |
from transformers import StoppingCriteria, AutoModelForCausalLM, AutoTokenizer, StoppingCriteriaList | |
import numpy as np | |
ref_model = AutoModelForCausalLM.from_pretrained("w601sxs/b1ade-1b", torch_dtype=torch.bfloat16) | |
tokenizer = AutoTokenizer.from_pretrained("w601sxs/b1ade-1b") | |
ref_model.eval() | |
class KeywordsStoppingCriteria(StoppingCriteria): | |
def __init__(self, keywords_ids:list): | |
self.keywords = keywords_ids | |
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: | |
if input_ids[0][-1] in self.keywords: | |
return True | |
return False | |
stop_words = ['>', ' >','> '] | |
stop_ids = [tokenizer.encode(w)[0] for w in stop_words] | |
stop_criteria = KeywordsStoppingCriteria(stop_ids) | |
if tokenizer.pad_token_id is None: | |
tokenizer.pad_token_id = tokenizer.eos_token_id | |
ref_model.config.pad_token_id = ref_model.config.eos_token_id | |
# Define your color-coding labels; if prob > x, then label = y; Sorted in descending probability order! | |
probs_to_label = [ | |
(0.99, "99%"), | |
(0.95, "95%"), | |
(0.9, "90%"), | |
(0.5, "50%"), | |
(0.1, "10%"), | |
(0.01, "1%"), | |
] | |
def get_tokens_and_labels(prompt): | |
""" | |
Given the prompt (text), return a list of tuples (decoded_token, label) | |
""" | |
inputs = tokenizer([prompt], return_tensors="pt").to("cuda") | |
outputs = ref_model.generate( | |
**inputs, | |
max_new_tokens=1000, | |
return_dict_in_generate=True, | |
output_scores=True, | |
stopping_criteria=StoppingCriteriaList([stop_criteria]) | |
) | |
# Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1) | |
transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True) | |
transition_proba = np.exp(transition_scores.double().cpu()) | |
# print(transition_proba) | |
# print(inputs) | |
# We only have scores for the generated tokens, so pop out the prompt tokens | |
input_length = inputs.input_ids.shape[1] | |
generated_ids = outputs.sequences[:, input_length:] | |
generated_tokens = tokenizer.convert_ids_to_tokens(generated_ids[0]) | |
# Important: you might need to find a tokenization character to replace (e.g. "Ġ" for BPE) and get the correct | |
# spacing into the final output 👼 | |
if ref_model.config.is_encoder_decoder: | |
highlighted_out = [] | |
else: | |
input_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0]) | |
highlighted_out = [(token.replace("▁", " "), None) for token in input_tokens] | |
# Get the (decoded_token, label) pairs for the generated tokens | |
for token, proba in zip(generated_tokens, transition_proba[0]): | |
this_label = None | |
assert 0. <= proba <= 1.0 | |
for min_proba, label in probs_to_label: | |
if proba >= min_proba: | |
this_label = label | |
break | |
highlighted_out.append((token.replace("▁", " "), this_label)) | |
return highlighted_out | |
import spacy | |
from spacy import displacy | |
from spacy.tokens import Span | |
from spacy.tokens import Doc | |
def render_output(prompt): | |
output = get_tokens_and_labels(prompt) | |
nlp = spacy.blank("en") | |
doc = nlp(''.join([a[0] for a in output]).replace('Ġ',' ').replace('Ċ','\n')) | |
words = [a[0].replace('Ġ',' ').replace('Ċ','\n') for a in output]#[:indices[2]] | |
doc = Doc(nlp.vocab, words=words) | |
doc.spans["sc"]=[] | |
c = 0 | |
for outs in output: | |
tmpouts = outs[0].replace('Ġ','').replace('Ċ','\n') | |
# print(c, "to", c+len(tmpouts)," : ", tmpouts) | |
if outs[1] is not None: | |
doc.spans["sc"].append(Span(doc, c, c+1, outs[1] )) | |
c+=1 | |
# if c>indices[2]-1: | |
# break | |
options = {'colors' : { | |
'99%': '#44ce1b', | |
'95%': '#bbdb44', | |
'90%': '#f7e379', | |
'50%': '#fec12a', | |
'10%': '#f2a134', | |
'1%': '#e51f1f', | |
'': '#e51f1f', | |
}} | |
return displacy.render(doc, style="span", options = options) | |
def predict(text): | |
inputs = tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = ref_model.generate(input_ids=inputs["input_ids"], max_new_tokens=128) | |
out_text = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0].split("answer:")[-1] | |
return out_text.split(text)[-1] | |
demo = gr.Interface( | |
fn=render_output, | |
inputs='text', | |
outputs='text', | |
) | |
demo.launch() |