Update src/prediction.py

Browse files

Files changed (1) hide show

src/prediction.py +29 -13

src/prediction.py CHANGED Viewed

@@ -22,6 +22,8 @@ generation_kwargs = {
     "early_stopping": True,
     "num_beams": 5,
     "length_penalty": 1.5,
 }
 special_tokens = tokenizer.all_special_tokens
@@ -50,7 +52,7 @@ def target_postprocessing(texts, special_tokens):
     return new_texts
-def generation_function(texts):
     _inputs = texts if isinstance(texts, list) else [texts]
     inputs = [prefix + inp for inp in _inputs]
     inputs = tokenizer(
@@ -58,23 +60,37 @@ def generation_function(texts):
         max_length=256,
         padding="max_length",
         truncation=True,
-        return_tensors='jax'
     )
     input_ids = inputs.input_ids
     attention_mask = inputs.attention_mask
-    output_ids = model.generate(
-        input_ids=input_ids,
-        attention_mask=attention_mask,
-        **generation_kwargs
-    )
-    generated = output_ids.sequences
-    generated_recipe = target_postprocessing(
-        tokenizer.batch_decode(generated, skip_special_tokens=False),
-        special_tokens
-    )
-    return generated_recipe
 items = [

     "early_stopping": True,
     "num_beams": 5,
     "length_penalty": 1.5,
+    "num_return_sequences": 3,  # Generate 3 unique sequences
+    "temperature": 0.8
 }
 special_tokens = tokenizer.all_special_tokens
     return new_texts
+def generation_function(texts, num_recipes=1):
     _inputs = texts if isinstance(texts, list) else [texts]
     inputs = [prefix + inp for inp in _inputs]
     inputs = tokenizer(
         max_length=256,
         padding="max_length",
         truncation=True,
+        return_tensors="pt"
     )
     input_ids = inputs.input_ids
     attention_mask = inputs.attention_mask
+    generated_recipes = []
+    while len(generated_recipes) < num_recipes:
+        output_ids = model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            **generation_kwargs
+        )
+        generated = output_ids.detach().cpu().numpy()
+        generated_recipe = target_postprocessing(
+            tokenizer.batch_decode(generated, skip_special_tokens=False),
+            special_tokens
+        )
+        # Check if generated_recipe is unique and contains only inputted ingredients
+        unique = True
+        for recipe in generated_recipes:
+            if generated_recipe == recipe or not all(ingredient in generated_recipe[0] for ingredient in texts[0].split(',')):
+                unique = False
+                break
+        if unique:
+            generated_recipes.append(generated_recipe)
+    return generated_recipes[0] if num_recipes == 1 else generated_recipes
 items = [