christofid's picture
Add t5-base variants
db8b533
raw
history blame
3.17 kB
import logging
import pathlib
import gradio as gr
import pandas as pd
from gt4sd.algorithms.generation.hugging_face import (
HuggingFaceSeq2SeqGenerator,
HuggingFaceGenerationAlgorithm,
)
from transformers import AutoTokenizer
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
task2prefix = {
"forward": "Predict the product of the following reaction: ",
"retrosynthesis": "Predict the reaction that produces the following product: ",
"paragraph to actions": "Which actions are described in the following paragraph: ",
"molecular captioning": "Caption the following smile: ",
"text-conditional de novo generation": "Write in SMILES the described molecule: ",
}
def run_inference(
model_name_or_path: str,
task: str,
prompt: str,
num_beams: int,
):
instruction = task2prefix[task]
config = HuggingFaceSeq2SeqGenerator(
algorithm_version=model_name_or_path,
prefix=instruction,
prompt=prompt,
num_beams=num_beams,
)
model = HuggingFaceGenerationAlgorithm(config)
tokenizer = AutoTokenizer.from_pretrained("t5-small")
text = list(model.sample(1))[0]
text = text.replace(instruction + prompt, "")
text = text.split(tokenizer.eos_token)[0]
text = text.replace(tokenizer.pad_token, "")
text = text.strip()
return text
if __name__ == "__main__":
models = [
"multitask-text-and-chemistry-t5-small-standard",
"multitask-text-and-chemistry-t5-small-augm",
"multitask-text-and-chemistry-t5-base-standard",
"multitask-text-and-chemistry-t5-base-augm",
]
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
""
)
print("Examples: ", examples.values.tolist())
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=run_inference,
title="Multitask Text and Chemistry T5",
inputs=[
gr.Dropdown(
models,
label="Language model",
value="multitask-text-and-chemistry-t5-small-augm",
),
gr.Radio(
choices=[
"forward",
"retrosynthesis",
"paragraph to actions",
"molecular captioning",
"text-conditional de novo generation",
],
label="Task",
value="paragraph to actions",
),
gr.Textbox(
label="Text prompt",
placeholder="I'm a stochastic parrot.",
lines=1,
),
gr.Slider(minimum=1, maximum=50, value=10, label="num_beams", step=1),
],
outputs=gr.Textbox(label="Output"),
article=article,
description=description,
examples=examples.values.tolist(),
)
demo.launch(debug=True, show_error=True)