jonathanjordan21 commited on
Commit
08fd334
1 Parent(s): 89695f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -30
app.py CHANGED
@@ -35,7 +35,12 @@ numOfKeywords = 20
35
  kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
36
 
37
 
38
-
 
 
 
 
 
39
 
40
 
41
 
@@ -45,35 +50,39 @@ def greet_json():
45
 
46
 
47
  @app.post("/key_phrase_extraction")
48
- async def key_phrase_extraction(inp:InputText):
49
- def merge_keyphrases(keyphrases):
50
- new_merged = keyphrases
51
- while True:
52
- merged = [new_merged[0]]
53
- for i in range(1, len(keyphrases)):
54
- keys = keyphrases[i]
55
- keys_prev = keyphrases[i-1]
56
- label = keys["label"]
57
- score = keys["score"]
58
- vectorizer = CountVectorizer(ngram_range=( 1,len(label.split(" ")) ), lowercase=False)
59
- analyzer = vectorizer.build_analyzer()
60
- for key in analyzer(label)[::-1]:
61
- key_prev = keys_prev["label"][::-1]
62
- if key == key_prev[:len(key)][::-1].strip():
63
- label = key_prev[len(key):][::-1].strip() + " " + label#.replace(key, "")
64
- score = max(keys_prev["score"],keys["score"])
65
- merged.pop()
66
- break
67
- merged.append({"label":label.strip(), "score":score})
68
- if new_merged == merged:
69
- break
70
- else:
71
- new_merged = merged
72
- return merged
73
-
74
- keywords = kw_extractor.extract_keywords(inp.text)
75
-
76
- return merge_keyphrases([{"label":key[0], "score":1-key[1]} for key in keywords if 1-key[1]>inp.threshold])
 
 
 
 
77
 
78
 
79
  @app.post("/language_detection")
 
35
  kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
36
 
37
 
38
+ ner_model = "syafiqfaray/indobert-model-ner"
39
+ ner = pipeline(
40
+ "ner",
41
+ ner_model,
42
+ aggregation_strategy="simple",
43
+ )
44
 
45
 
46
 
 
50
 
51
 
52
  @app.post("/key_phrase_extraction")
53
+ async def key_phrase_extraction(inp: InputText):
54
+ return [{"label": x["word"], "score": x["score"]} for x in ner(inp.text) if x["score"] > inp.threshold]
55
+
56
+ # @app.post("/key_phrase_extraction")
57
+ # async def key_phrase_extraction(inp:InputText):
58
+ # def merge_keyphrases(keyphrases):
59
+ # new_merged = keyphrases
60
+ # while True:
61
+ # merged = [new_merged[0]]
62
+ # for i in range(1, len(keyphrases)):
63
+ # keys = keyphrases[i]
64
+ # keys_prev = keyphrases[i-1]
65
+ # label = keys["label"]
66
+ # score = keys["score"]
67
+ # vectorizer = CountVectorizer(ngram_range=( 1,len(label.split(" ")) ), lowercase=False)
68
+ # analyzer = vectorizer.build_analyzer()
69
+ # for key in analyzer(label)[::-1]:
70
+ # key_prev = keys_prev["label"][::-1]
71
+ # if key == key_prev[:len(key)][::-1].strip():
72
+ # label = key_prev[len(key):][::-1].strip() + " " + label#.replace(key, "")
73
+ # score = max(keys_prev["score"],keys["score"])
74
+ # merged.pop()
75
+ # break
76
+ # merged.append({"label":label.strip(), "score":score})
77
+ # if new_merged == merged:
78
+ # break
79
+ # else:
80
+ # new_merged = merged
81
+ # return merged
82
+
83
+ # keywords = kw_extractor.extract_keywords(inp.text)
84
+
85
+ # return merge_keyphrases([{"label":key[0], "score":1-key[1]} for key in keywords if 1-key[1]>inp.threshold])
86
 
87
 
88
  @app.post("/language_detection")