Jingxiang Mo commited on
Commit
9522bb7
1 Parent(s): a381bc0
Files changed (1) hide show
  1. app.py +38 -11
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import os
2
  import gradio as gr
3
  import wikipediaapi as wk
4
-
5
-
6
  from transformers import (
7
  TokenClassificationPipeline,
8
  AutoModelForTokenClassification,
@@ -11,6 +9,7 @@ from transformers import (
11
  from transformers.pipelines import AggregationStrategy
12
  import numpy as np
13
 
 
14
  class KeyphraseExtractionPipeline(TokenClassificationPipeline):
15
  def __init__(self, model, *args, **kwargs):
16
  super().__init__(
@@ -26,13 +25,35 @@ class KeyphraseExtractionPipeline(TokenClassificationPipeline):
26
  aggregation_strategy=AggregationStrategy.SIMPLE,
27
  )
28
  return np.unique([result.get("word").strip() for result in results])
29
-
30
- # Load pipeline
31
  model_name = "ml6team/keyphrase-extraction-kbir-inspec"
32
  extractor = KeyphraseExtractionPipeline(model=model_name)
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def keyphrases_out(input): #Not used but might be useful
 
36
  input = input.replace("\n", " ")
37
  keyphrases = extractor(input)
38
  out = "The Key Phrases in your text are:\n\n"
@@ -44,16 +65,22 @@ def wikipedia_search(input):
44
  input = input.replace("\n", " ")
45
  keyphrases = extractor(input)
46
  wiki = wk.Wikipedia('en')
47
- for k in keyphrases:
48
- page = wiki.page(k)
49
- if page.exists():
50
- break
51
  return page.summary
52
 
53
 
54
- demo = gr.Interface(fn=wikipedia_search, inputs = "text", outputs = "text")
55
 
56
- demo.launch()
 
 
 
 
 
 
 
 
 
57
 
58
 
59
 
 
1
  import os
2
  import gradio as gr
3
  import wikipediaapi as wk
 
 
4
  from transformers import (
5
  TokenClassificationPipeline,
6
  AutoModelForTokenClassification,
 
9
  from transformers.pipelines import AggregationStrategy
10
  import numpy as np
11
 
12
+ # =====[ DEFINE PIPELINE ]===== #
13
  class KeyphraseExtractionPipeline(TokenClassificationPipeline):
14
  def __init__(self, model, *args, **kwargs):
15
  super().__init__(
 
25
  aggregation_strategy=AggregationStrategy.SIMPLE,
26
  )
27
  return np.unique([result.get("word").strip() for result in results])
28
+
29
+ # =====[ LOAD PIPELINE ]===== #
30
  model_name = "ml6team/keyphrase-extraction-kbir-inspec"
31
  extractor = KeyphraseExtractionPipeline(model=model_name)
32
 
33
+ text = """
34
+ Keyphrase extraction is a technique in text analysis where you extract the
35
+ important keyphrases from a document. Thanks to these keyphrases humans can
36
+ understand the content of a text very quickly and easily without reading it
37
+ completely. Keyphrase extraction was first done primarily by human annotators,
38
+ who read the text in detail and then wrote down the most important keyphrases.
39
+ The disadvantage is that if you work with a lot of documents, this process
40
+ can take a lot of time.
41
+
42
+ Here is where Artificial Intelligence comes in. Currently, classical machine
43
+ learning methods, that use statistical and linguistic features, are widely used
44
+ for the extraction process. Now with deep learning, it is possible to capture
45
+ the semantic meaning of a text even better than these classical methods.
46
+ Classical methods look at the frequency, occurrence and order of words
47
+ in the text, whereas these neural approaches can capture long-term
48
+ semantic dependencies and context of words in a text.
49
+ """.replace("\n", " ")
50
+
51
+ keyphrases = extractor(text)
52
+
53
+ print(keyphrases)
54
 
55
+
56
+ def keyphrases_out(input):
57
  input = input.replace("\n", " ")
58
  keyphrases = extractor(input)
59
  out = "The Key Phrases in your text are:\n\n"
 
65
  input = input.replace("\n", " ")
66
  keyphrases = extractor(input)
67
  wiki = wk.Wikipedia('en')
68
+
69
+ page = wiki.page("")
 
 
70
  return page.summary
71
 
72
 
 
73
 
74
+
75
+ # for k in keyphrases:
76
+ # page = wiki.page(k)
77
+ # if page.exists():
78
+ # break
79
+ # return page.summary
80
+
81
+ # =====[ DEFINE INTERFACE ]===== #'
82
+ # demo = gr.Interface(fn=wikipedia_search, inputs = "text", outputs = "text")
83
+ # demo.launch(share=True)
84
 
85
 
86