|
import logging |
|
import pathlib |
|
import gradio as gr |
|
import pandas as pd |
|
from gt4sd.algorithms.generation.hugging_face import ( |
|
HuggingFaceSeq2SeqGenerator, |
|
HuggingFaceGenerationAlgorithm, |
|
) |
|
from transformers import AutoTokenizer |
|
|
|
logger = logging.getLogger(__name__) |
|
logger.addHandler(logging.NullHandler()) |
|
|
|
task2prefix = { |
|
"forward": "Predict the product of the following reaction: ", |
|
"retrosynthesis": "Predict the reaction that produces the following product: ", |
|
"paragraph to actions": "Which actions are described in the following paragraph: ", |
|
"molecular captioning": "Caption the following smile: ", |
|
"text-conditional de novo generation": "Write in SMILES the described molecule: ", |
|
} |
|
|
|
|
|
def run_inference( |
|
model_name_or_path: str, |
|
task: str, |
|
prompt: str, |
|
num_beams: int, |
|
): |
|
instruction = task2prefix[task] |
|
|
|
config = HuggingFaceSeq2SeqGenerator( |
|
algorithm_version=model_name_or_path, |
|
prefix=instruction, |
|
prompt=prompt, |
|
num_beams=num_beams, |
|
) |
|
|
|
model = HuggingFaceGenerationAlgorithm(config) |
|
tokenizer = AutoTokenizer.from_pretrained("t5-small") |
|
|
|
text = list(model.sample(1))[0] |
|
|
|
text = text.replace(instruction + prompt, "") |
|
text = text.split(tokenizer.eos_token)[0] |
|
text = text.replace(tokenizer.pad_token, "") |
|
text = text.strip() |
|
|
|
return text |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
models = [ |
|
"multitask-text-and-chemistry-t5-small-standard", |
|
"multitask-text-and-chemistry-t5-small-augm", |
|
"multitask-text-and-chemistry-t5-base-standard", |
|
"multitask-text-and-chemistry-t5-base-augm", |
|
] |
|
|
|
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") |
|
|
|
examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna( |
|
"" |
|
) |
|
print("Examples: ", examples.values.tolist()) |
|
|
|
with open(metadata_root.joinpath("article.md"), "r") as f: |
|
article = f.read() |
|
with open(metadata_root.joinpath("description.md"), "r") as f: |
|
description = f.read() |
|
|
|
demo = gr.Interface( |
|
fn=run_inference, |
|
title="Multitask Text and Chemistry T5", |
|
inputs=[ |
|
gr.Dropdown( |
|
models, |
|
label="Language model", |
|
value="multitask-text-and-chemistry-t5-small-augm", |
|
), |
|
gr.Radio( |
|
choices=[ |
|
"forward", |
|
"retrosynthesis", |
|
"paragraph to actions", |
|
"molecular captioning", |
|
"text-conditional de novo generation", |
|
], |
|
label="Task", |
|
value="paragraph to actions", |
|
), |
|
gr.Textbox( |
|
label="Text prompt", |
|
placeholder="I'm a stochastic parrot.", |
|
lines=1, |
|
), |
|
gr.Slider(minimum=1, maximum=50, value=10, label="num_beams", step=1), |
|
], |
|
outputs=gr.Textbox(label="Output"), |
|
article=article, |
|
description=description, |
|
examples=examples.values.tolist(), |
|
) |
|
demo.launch(debug=True, show_error=True) |
|
|