File size: 3,065 Bytes
2e605bf 7198503 89e8857 2e605bf 7198503 2e605bf 89e8857 2e605bf 7198503 89e8857 7198503 2e605bf 89e8857 2e605bf 7198503 89e8857 7198503 89e8857 2e605bf 7198503 2e605bf 89e8857 7198503 2e605bf 89e8857 2e605bf 7198503 2e605bf 7198503 2e605bf 7198503 89e8857 2e605bf 7198503 2e605bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import logging
import pathlib
import gradio as gr
import pandas as pd
from gt4sd.algorithms.generation.hugging_face import (
HuggingFaceSeq2SeqGenerator,
HuggingFaceGenerationAlgorithm,
)
from transformers import AutoTokenizer
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
task2prefix = {
"forward": "Predict the product of the following reaction: ",
"retrosynthesis": "Predict the reaction that produces the following product: ",
"paragraph to actions": "Which actions are described in the following paragraph: ",
"molecular captioning": "Caption the following SMILES: ",
"text-conditional de novo generation": "Write in SMILES the described molecule: ",
}
def run_inference(
model_name_or_path: str,
task: str,
prompt: str,
num_beams: int,
):
instruction = task2prefix[task]
config = HuggingFaceSeq2SeqGenerator(
algorithm_version=model_name_or_path,
prefix=instruction,
prompt=prompt,
num_beams=num_beams,
)
model = HuggingFaceGenerationAlgorithm(config)
tokenizer = AutoTokenizer.from_pretrained("t5-small")
text = list(model.sample(1))[0]
text = text.replace(instruction + prompt, "")
text = text.split(tokenizer.eos_token)[0]
text = text.replace(tokenizer.pad_token, "")
text = text.strip()
return text
if __name__ == "__main__":
models = [
"text-chem-t5-small-standard",
"text-chem-t5-small-augm",
"text-chem-t5-base-standard",
"text-chem-t5-base-augm",
]
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
""
)
print("Examples: ", examples.values.tolist())
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=run_inference,
title="Text-chem-T5 model",
inputs=[
gr.Dropdown(
models,
label="Language model",
value="text-chem-t5-base-augm",
),
gr.Radio(
choices=[
"forward",
"retrosynthesis",
"paragraph to actions",
"molecular captioning",
"text-conditional de novo generation",
],
label="Task",
value="paragraph to actions",
),
gr.Textbox(
label="Text prompt",
placeholder="I'm a stochastic parrot.",
lines=1,
),
gr.Slider(minimum=1, maximum=50, value=10, label="num_beams", step=1),
],
outputs=gr.Textbox(label="Output"),
article=article,
description=description,
examples=examples.values.tolist(),
)
demo.launch(debug=True, show_error=True)
|