wldmr commited on
Commit
072aee3
·
1 Parent(s): c5c1da9
Files changed (2) hide show
  1. app.py +2 -0
  2. metrics.py +10 -1
app.py CHANGED
@@ -83,6 +83,8 @@ def predict(brakes, transcript):
83
 
84
  if __name__ == "__main__":
85
 
 
 
86
  title = "Deep Punkt App"
87
  description = """
88
  <b>Description</b>: <br>
 
83
 
84
  if __name__ == "__main__":
85
 
86
+ metrics.load_nltk()
87
+
88
  title = "Deep Punkt App"
89
  description = """
90
  <b>Description</b>: <br>
metrics.py CHANGED
@@ -1,8 +1,17 @@
1
  # Import nltk library for natural language processing
2
  import nltk
3
-
4
  from transformers import AutoTokenizer
5
 
 
 
 
 
 
 
 
 
 
6
  # Define a function that takes some text as input and returns the number of tokens
7
  def token_count(text):
8
  # Import the Encoder class from bpe
 
1
  # Import nltk library for natural language processing
2
  import nltk
3
+ import os
4
  from transformers import AutoTokenizer
5
 
6
+ def load_nltk():
7
+ nltk_file = '/home/user/nltk_data/tokenizers/punkt.zip'
8
+ if os.path.exists(nltk_file):
9
+ print('nltk punkt file exists in ', nltk_file)
10
+ else:
11
+ print("downloading punkt file")
12
+ nltk.download('punkt')
13
+
14
+
15
  # Define a function that takes some text as input and returns the number of tokens
16
  def token_count(text):
17
  # Import the Encoder class from bpe