Spaces:

Unityraptor
/

MediaRephraser

Sleeping

Unityraptor commited on May 31

Commit

889dd23

•

1 Parent(s): 1186b46

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,33 +1,39 @@
-pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
 import gradio as gr
-from parrot import Parrot
-import torch
-import warnings
-warnings.filterwarnings("ignore")
-'''
-uncomment to get reproducable paraphrase generations
-def random_state(seed):
-  torch.manual_seed(seed)
-  if torch.cuda.is_available():
-    torch.cuda.manual_seed_all(seed)
-random_state(1234)
-'''
-#Init models (make sure you init ONLY once if you integrate this to your code)
-parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
-phrases = ["Can you recommed some upscale restaurants in Newyork?",
-           "What are the famous places we should not miss in Russia?"
-]
-for phrase in phrases:
-  print("-"*100)
-  print("Input_phrase: ", phrase)
-  print("-"*100)
-  para_phrases = parrot.augment(input_phrase=phrase)
-  for para_phrase in para_phrases:
-   print(para_phrase)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+device = "cuda"
+tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
+model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
+def paraphrase(
+    question,
+    num_beams=5,
+    num_beam_groups=5,
+    num_return_sequences=5,
+    repetition_penalty=10.0,
+    diversity_penalty=3.0,
+    no_repeat_ngram_size=2,
+    temperature=0.7,
+    max_length=128
+):
+    input_ids = tokenizer(
+        f'paraphrase: {question}',
+        return_tensors="pt", padding="longest",
+        max_length=max_length,
+        truncation=True,
+    ).input_ids.to(device)
+    outputs = model.generate(
+        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
+        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
+        num_beams=num_beams, num_beam_groups=num_beam_groups,
+        max_length=max_length, diversity_penalty=diversity_penalty
+    )
+    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    return res