multitask-text-and-chemistry-t5

Running

App Files Files Community

christofid commited on Feb 21, 2023

Commit

89e8857

•

1 Parent(s): dc87e4f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -12

app.py CHANGED Viewed

@@ -4,25 +4,35 @@ import gradio as gr
 import pandas as pd
 from gt4sd.algorithms.generation.hugging_face import (
     HuggingFaceSeq2SeqGenerator,
-    HuggingFaceGenerationAlgorithm
 )
 from transformers import AutoTokenizer
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def run_inference(
     model_name_or_path: str,
-    prefix: str,
     prompt: str,
     num_beams: int,
 ):
     config = HuggingFaceSeq2SeqGenerator(
         algorithm_version=model_name_or_path,
-        prefix=prefix,
         prompt=prompt,
-        num_beams=num_beams
     )
     model = HuggingFaceGenerationAlgorithm(config)
@@ -30,22 +40,23 @@ def run_inference(
     text = list(model.sample(1))[0]
-    text = text.replace(prefix+prompt,"")
     text = text.split(tokenizer.eos_token)[0]
     text = text.replace(tokenizer.pad_token, "")
     text = text.strip()
     return text
 if __name__ == "__main__":
-    # Preparation (retrieve all available algorithms)
-    models = ["text-chem-t5-small-standard", "text-chem-t5-small-augm",
-              "text-chem-t5-base-standard", "text-chem-t5-base-augm"]
-    # Load metadata
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
     examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
@@ -67,8 +78,16 @@ if __name__ == "__main__":
                 label="Language model",
                 value="text-chem-t5-base-augm",
             ),
-            gr.Textbox(
-                label="Prefix", placeholder="A task-specific prefix", lines=1
             ),
             gr.Textbox(
                 label="Text prompt",

 import pandas as pd
 from gt4sd.algorithms.generation.hugging_face import (
     HuggingFaceSeq2SeqGenerator,
+    HuggingFaceGenerationAlgorithm,
 )
 from transformers import AutoTokenizer
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
+task2prefix = {
+    "forward": "Predict the product of the following reaction: ",
+    "retrosynthesis": "Predict the reaction that produces the following product: ",
+    "paragraph to actions": "Which actions are described in the following paragraph: ",
+    "molecular captioning": "Caption the following SMILES: ",
+    "text-conditional de novo generation": "Write in SMILES the described molecule: ",
+}
 def run_inference(
     model_name_or_path: str,
+    task: str,
     prompt: str,
     num_beams: int,
 ):
+    instruction = task2prefix[task]
     config = HuggingFaceSeq2SeqGenerator(
         algorithm_version=model_name_or_path,
+        prefix=instruction,
         prompt=prompt,
+        num_beams=num_beams,
     )
     model = HuggingFaceGenerationAlgorithm(config)
     text = list(model.sample(1))[0]
+    text = text.replace(instruction + prompt, "")
     text = text.split(tokenizer.eos_token)[0]
     text = text.replace(tokenizer.pad_token, "")
     text = text.strip()
     return text
 if __name__ == "__main__":
+    models = [
+        "text-chem-t5-small-standard",
+        "text-chem-t5-small-augm",
+        "text-chem-t5-base-standard",
+        "text-chem-t5-base-augm",
+    ]
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
     examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
                 label="Language model",
                 value="text-chem-t5-base-augm",
             ),
+            gr.Radio(
+                choices=[
+                    "forward",
+                    "retrosynthesis",
+                    "paragraph to actions",
+                    "molecular captioning",
+                    "text-conditional de novo generation",
+                ],
+                label="Task",
+                value="paragraph to actions",
             ),
             gr.Textbox(
                 label="Text prompt",