Peter commited on
Commit
8d9ed7d
1 Parent(s): 7afd604

🔊 ⚡️ add logs, improve params

Browse files

Signed-off-by: Peter <74869040+pszemraj@users.noreply.github.com>

Files changed (1) hide show
  1. grammar_improve.py +6 -4
grammar_improve.py CHANGED
@@ -3,6 +3,8 @@ grammar_improve.py - this .py script contains functions to improve the grammar o
3
 
4
  """
5
 
 
 
6
  import math
7
  import pprint as pp
8
  import re
@@ -453,8 +455,8 @@ def correct_grammar(
453
  n_results: int = 1,
454
  beams: int = 8,
455
  temp=1,
456
- uniq_ngrams=2,
457
- rep_penalty=1.5,
458
  device="cpu",
459
  ):
460
  """
@@ -480,7 +482,7 @@ def correct_grammar(
480
  st = time.perf_counter()
481
 
482
  if len(tokenizer(input_text).input_ids) < 4:
483
- print(f"input text of {input_text} is too short to be corrected")
484
  return input_text
485
  max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
486
  batch = tokenizer(
@@ -494,7 +496,7 @@ def correct_grammar(
494
  **batch,
495
  max_length=max_length,
496
  min_length=min(10, len(input_text)),
497
- no_repeat_ngram_size=uniq_ngrams,
498
  repetition_penalty=rep_penalty,
499
  num_beams=beams,
500
  num_return_sequences=n_results,
 
3
 
4
  """
5
 
6
+ import logging
7
+ logging.basicConfig(level=logging.INFO)
8
  import math
9
  import pprint as pp
10
  import re
 
455
  n_results: int = 1,
456
  beams: int = 8,
457
  temp=1,
458
+ no_repeat_ngram_size=4,
459
+ rep_penalty=2.5,
460
  device="cpu",
461
  ):
462
  """
 
482
  st = time.perf_counter()
483
 
484
  if len(tokenizer(input_text).input_ids) < 4:
485
+ logging.info(f"input text of {input_text} is too short to be corrected")
486
  return input_text
487
  max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
488
  batch = tokenizer(
 
496
  **batch,
497
  max_length=max_length,
498
  min_length=min(10, len(input_text)),
499
+ no_repeat_ngram_size=no_repeat_ngram_size,
500
  repetition_penalty=rep_penalty,
501
  num_beams=beams,
502
  num_return_sequences=n_results,