Peter commited on
Commit
c0a9b19
·
1 Parent(s): b2df366

:truck: move functions

Browse files
Files changed (2) hide show
  1. app.py +2 -39
  2. utils.py +49 -0
app.py CHANGED
@@ -7,6 +7,7 @@ import nltk
7
  from cleantext import clean
8
 
9
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
 
10
 
11
  _here = Path(__file__).parent
12
 
@@ -18,27 +19,6 @@ transformers.logging.set_verbosity_error()
18
  logging.basicConfig()
19
 
20
 
21
- def truncate_word_count(text, max_words=512):
22
- """
23
- truncate_word_count - a helper function for the gradio module
24
- Parameters
25
- ----------
26
- text : str, required, the text to be processed
27
- max_words : int, optional, the maximum number of words, default=512
28
- Returns
29
- -------
30
- dict, the text and whether it was truncated
31
- """
32
- # split on whitespace with regex
33
- words = re.split(r"\s+", text)
34
- processed = {}
35
- if len(words) > max_words:
36
- processed["was_truncated"] = True
37
- processed["truncated_text"] = " ".join(words[:max_words])
38
- else:
39
- processed["was_truncated"] = False
40
- processed["truncated_text"] = text
41
- return processed
42
 
43
 
44
  def proc_submission(
@@ -117,23 +97,6 @@ def proc_submission(
117
  return html
118
 
119
 
120
- def load_examples(examples_dir="examples"):
121
- """
122
- load_examples - a helper function for the gradio module to load examples
123
- Returns:
124
- list of str, the examples
125
- """
126
- src = _here / examples_dir
127
- src.mkdir(exist_ok=True)
128
- examples = [f for f in src.glob("*.txt")]
129
- # load the examples into a list
130
- text_examples = []
131
- for example in examples:
132
- with open(example, "r") as f:
133
- text = f.read()
134
- text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
135
-
136
- return text_examples
137
 
138
 
139
  if __name__ == "__main__":
@@ -183,6 +146,6 @@ if __name__ == "__main__":
183
  title=title,
184
  description=description,
185
  article="The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial.",
186
- examples=load_examples(),
187
  cache_examples=True,
188
  ).launch()
 
7
  from cleantext import clean
8
 
9
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
10
+ from utils import load_examples, truncate_word_count
11
 
12
  _here = Path(__file__).parent
13
 
 
19
  logging.basicConfig()
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def proc_submission(
 
97
  return html
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
 
102
  if __name__ == "__main__":
 
146
  title=title,
147
  description=description,
148
  article="The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial.",
149
+ examples=load_examples(_here / "examples"),
150
  cache_examples=True,
151
  ).launch()
utils.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py - Utility functions for the project.
3
+ """
4
+
5
+
6
+ from pathlib import Path
7
+ import re
8
+
9
+
10
+ def truncate_word_count(text, max_words=512):
11
+ """
12
+ truncate_word_count - a helper function for the gradio module
13
+ Parameters
14
+ ----------
15
+ text : str, required, the text to be processed
16
+ max_words : int, optional, the maximum number of words, default=512
17
+ Returns
18
+ -------
19
+ dict, the text and whether it was truncated
20
+ """
21
+ # split on whitespace with regex
22
+ words = re.split(r"\s+", text)
23
+ processed = {}
24
+ if len(words) > max_words:
25
+ processed["was_truncated"] = True
26
+ processed["truncated_text"] = " ".join(words[:max_words])
27
+ else:
28
+ processed["was_truncated"] = False
29
+ processed["truncated_text"] = text
30
+ return processed
31
+
32
+
33
+ def load_examples(src):
34
+ """
35
+ load_examples - a helper function for the gradio module to load examples
36
+ Returns:
37
+ list of str, the examples
38
+ """
39
+ src = Path(src)
40
+ src.mkdir(exist_ok=True)
41
+ examples = [f for f in src.glob("*.txt")]
42
+ # load the examples into a list
43
+ text_examples = []
44
+ for example in examples:
45
+ with open(example, "r") as f:
46
+ text = f.read()
47
+ text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
48
+
49
+ return text_examples