jonathanjordan21
commited on
Commit
•
08fd334
1
Parent(s):
89695f3
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,12 @@ numOfKeywords = 20
|
|
35 |
kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
|
36 |
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
|
@@ -45,35 +50,39 @@ def greet_json():
|
|
45 |
|
46 |
|
47 |
@app.post("/key_phrase_extraction")
|
48 |
-
async def key_phrase_extraction(inp:InputText):
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
77 |
|
78 |
|
79 |
@app.post("/language_detection")
|
|
|
35 |
kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
|
36 |
|
37 |
|
38 |
+
ner_model = "syafiqfaray/indobert-model-ner"
|
39 |
+
ner = pipeline(
|
40 |
+
"ner",
|
41 |
+
ner_model,
|
42 |
+
aggregation_strategy="simple",
|
43 |
+
)
|
44 |
|
45 |
|
46 |
|
|
|
50 |
|
51 |
|
52 |
@app.post("/key_phrase_extraction")
|
53 |
+
async def key_phrase_extraction(inp: InputText):
|
54 |
+
return [{"label": x["word"], "score": x["score"]} for x in ner(inp.text) if x["score"] > inp.threshold]
|
55 |
+
|
56 |
+
# @app.post("/key_phrase_extraction")
|
57 |
+
# async def key_phrase_extraction(inp:InputText):
|
58 |
+
# def merge_keyphrases(keyphrases):
|
59 |
+
# new_merged = keyphrases
|
60 |
+
# while True:
|
61 |
+
# merged = [new_merged[0]]
|
62 |
+
# for i in range(1, len(keyphrases)):
|
63 |
+
# keys = keyphrases[i]
|
64 |
+
# keys_prev = keyphrases[i-1]
|
65 |
+
# label = keys["label"]
|
66 |
+
# score = keys["score"]
|
67 |
+
# vectorizer = CountVectorizer(ngram_range=( 1,len(label.split(" ")) ), lowercase=False)
|
68 |
+
# analyzer = vectorizer.build_analyzer()
|
69 |
+
# for key in analyzer(label)[::-1]:
|
70 |
+
# key_prev = keys_prev["label"][::-1]
|
71 |
+
# if key == key_prev[:len(key)][::-1].strip():
|
72 |
+
# label = key_prev[len(key):][::-1].strip() + " " + label#.replace(key, "")
|
73 |
+
# score = max(keys_prev["score"],keys["score"])
|
74 |
+
# merged.pop()
|
75 |
+
# break
|
76 |
+
# merged.append({"label":label.strip(), "score":score})
|
77 |
+
# if new_merged == merged:
|
78 |
+
# break
|
79 |
+
# else:
|
80 |
+
# new_merged = merged
|
81 |
+
# return merged
|
82 |
+
|
83 |
+
# keywords = kw_extractor.extract_keywords(inp.text)
|
84 |
+
|
85 |
+
# return merge_keyphrases([{"label":key[0], "score":1-key[1]} for key in keywords if 1-key[1]>inp.threshold])
|
86 |
|
87 |
|
88 |
@app.post("/language_detection")
|