|
import gradio as gr |
|
import transformers |
|
import torch.nn.functional as F |
|
import numpy as np |
|
|
|
def generate(model_name="Salesforce/codegen-350M-mono", text="World"): |
|
model = transformers.AutoModelForCausalLM.from_pretrained(model_name) |
|
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) |
|
input_ids = tokenizer.encode(text, return_tensors='pt') |
|
output = model.generate(input_ids, max_length=100, do_sample=True) |
|
return tokenizer.decode(output[0]) |
|
|
|
def get_token_likelyhoods(model_name="Salesforce/codegen-350M-mono", text="World"): |
|
|
|
model = transformers.AutoModelForCausalLM.from_pretrained(model_name) |
|
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) |
|
input_ids = tokenizer.encode(text, return_tensors='pt') |
|
out = model(input_ids) |
|
probs = F.softmax(out.logits, dim=-1).squeeze() |
|
|
|
output = [] |
|
for tok, logits in zip(input_ids.squeeze(), probs): |
|
output.append(( |
|
tokenizer.decode(tok), |
|
str(round(logits[tok].item() * 100, 4)) + "%", |
|
|
|
)) |
|
|
|
return output |
|
|
|
demo = gr.Interface( |
|
fn=get_token_likelyhoods, |
|
title="Per-token likelyhood GUI based on Giant Language model Test Room", |
|
|
|
inputs = [ |
|
gr.Textbox( |
|
label="Model name", |
|
lines=1, |
|
value="Salesforce/codegen-350M-mono", |
|
), |
|
gr.Textbox( |
|
label="Text", |
|
lines=3, |
|
value="def first_n_primes(n):\n primes = []\n i = 2\n while len(primes) < n:\n if is_prime(i):\n primes.append(i)\n i += 1\n return", |
|
), |
|
], |
|
outputs = gr.HighlightedText( |
|
label="Diff", |
|
combine_adjacent=True, |
|
).style(color_map={"+": "red", "-": "green"}), |
|
) |
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|
|
|
|
|