Spaces:

ltg
/

no-en-translation

Sleeping

davda54 commited on Oct 5, 2023

Commit

f8494a3

1 Parent(s): fe94d2e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -73,6 +73,18 @@ class BatchStreamer(TextIteratorStreamer):
         self.on_finalized_text(printable_text, stream_end=True)
 def translate(source, source_language, target_language):
     if source_language == target_language:
         yield source.strip()
@@ -97,10 +109,17 @@ def translate(source, source_language, target_language):
         input_ids=source_subwords,
         attention_mask=(source_subwords != pad_index).long(),
         max_new_tokens = 512-1,
         # num_beams=4,
         # early_stopping=True,
-        do_sample=False,
-        use_cache=True
     )
     t = Thread(target=generate, args=(model,), kwargs=generate_kwargs)
     t.start()

         self.on_finalized_text(printable_text, stream_end=True)
+class RepetitionPenaltyLogitsProcessor(LogitsProcessor):
+    def __init__(self, penalty: float, model):
+        last_bias = model.classifier.nonlinearity[-1].bias.data
+        last_bias = torch.nn.functional.log_softmax(last_bias)
+        self.penalty = penalty * (last_bias - last_bias.max())
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        penalized_score = torch.gather(scores + self.penalty.unsqueeze(0).to(input_ids.device), 1, input_ids)
+        scores.scatter_(1, input_ids, penalized_score)
+        return scores
 def translate(source, source_language, target_language):
     if source_language == target_language:
         yield source.strip()
         input_ids=source_subwords,
         attention_mask=(source_subwords != pad_index).long(),
         max_new_tokens = 512-1,
+        top_k=64,
+        top_p=0.95,
+        do_sample=True,
+        temperature=0.3,
+        num_beams=1,
+        use_cache=True,
+        logits_processor=[RepetitionPenaltyLogitsProcessor(1.0, model)]
         # num_beams=4,
         # early_stopping=True,
+        #do_sample=False,
+        #use_cache=True
     )
     t = Thread(target=generate, args=(model,), kwargs=generate_kwargs)
     t.start()