import gradio as gr import torch from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer # device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoPeftModelForCausalLM.from_pretrained( "Someman/bloomz-560m-fine-tuned-adapters_v2.0" ).to(device) tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m") SUMMARIZATION = "Summarization" TITLE_GENERATION = "Title Generation" def generate_output(prompt, input, kwargs): text = input+prompt inputs = tokenizer(text, return_tensors="pt").to(device) generate = model.generate(**inputs, **kwargs) output = tokenizer.batch_decode( generate[:, inputs.input_ids.shape[1] :], skip_special_tokens=True ) return output[0].split("\n")[0].strip() def summarization(input: str): prompt = " \\nSummary in the same language as the doc:" kwargs = {"max_new_tokens": 50} return generate_output(prompt, input, kwargs) def title_generation(input: str): prompt = "\\n\\nGive me a good title for the article above." kwargs = {"max_new_tokens": 50} return generate_output(prompt, input, kwargs) def generate(task: str, input: str): if len(input) > 20: if task == SUMMARIZATION: return summarization(input) elif task == TITLE_GENERATION: return title_generation(input) else: return "Wow! Very Dangerous!" return "Enter something meaningful." demo = gr.Interface( generate, [ gr.Dropdown( [SUMMARIZATION, TITLE_GENERATION], label="Task", info="Will add more task later!", ), gr.TextArea(), ], outputs="text", ) demo.launch()