jonathanjordan21 commited on
Commit
2a52076
1 Parent(s): 46b8bbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -3,6 +3,9 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipe
3
  import torch
4
  from pydantic import BaseModel
5
  from typing import Optional
 
 
 
6
 
7
  app = FastAPI()
8
 
@@ -22,6 +25,15 @@ language_model = AutoModelForSequenceClassification.from_pretrained(model_name)
22
  language_tokenizer = AutoTokenizer.from_pretrained(model_name)
23
 
24
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
 
@@ -32,6 +44,37 @@ def greet_json():
32
  return {"Hello": "World!"}
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  @app.post("/language_detection")
37
  async def language_detection(inp: InputText):
 
3
  import torch
4
  from pydantic import BaseModel
5
  from typing import Optional
6
+ from sklearn.feature_extraction.text import CountVectorizer
7
+ import yake
8
+
9
 
10
  app = FastAPI()
11
 
 
25
  language_tokenizer = AutoTokenizer.from_pretrained(model_name)
26
 
27
 
28
+ language = "id"
29
+ max_ngram_size = 3
30
+ deduplication_threshold = 0.6
31
+ deduplication_algo = 'seqm'
32
+ windowSize = 3
33
+ numOfKeywords = 20
34
+
35
+ kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
36
+
37
 
38
 
39
 
 
44
  return {"Hello": "World!"}
45
 
46
 
47
+ @app.post("/key_phrase_extraction")
48
+ async def key_phrase_extraction(inp:InputText):
49
+ def merge_keyphrases(keyphrases):
50
+ new_merged = keyphrases
51
+ while True:
52
+ merged = [new_merged[0]]
53
+ for i in range(1, len(keyphrases)):
54
+ keys = keyphrases[i]
55
+ keys_prev = keyphrases[i-1]
56
+ label = keys["label"]
57
+ score = keys["score"]
58
+ vectorizer = CountVectorizer(ngram_range=( 1,len(label.split(" ")) ), lowercase=False)
59
+ analyzer = vectorizer.build_analyzer()
60
+ for key in analyzer(label)[::-1]:
61
+ key_prev = keys_prev["label"][::-1]
62
+ if key == key_prev[:len(key)][::-1].strip():
63
+ label = key_prev[len(key):][::-1].strip() + " " + label#.replace(key, "")
64
+ score = max(keys_prev["score"],keys["score"])
65
+ merged.pop()
66
+ break
67
+ merged.append({"label":label.strip(), "score":score})
68
+ if new_merged == merged:
69
+ break
70
+ else:
71
+ new_merged = merged
72
+ return merged
73
+
74
+ keywords = kw_extractor.extract_keywords(inp.text)
75
+
76
+ return merge_keyphrases([{"label":key[0], "score":1-key[1]} for key in keywords if 1-key[1]>inp.threshold])
77
+
78
 
79
  @app.post("/language_detection")
80
  async def language_detection(inp: InputText):