File size: 2,962 Bytes
ea8b4a5 b01c113 ea8b4a5 6a128a7 d812ab5 b01c113 ea8b4a5 6a128a7 ea8b4a5 b01c113 6a128a7 d812ab5 b01c113 6a128a7 28c31d6 2cb17e6 6a128a7 28c31d6 6a128a7 79fd2ad ea8b4a5 b01c113 cc6b847 b01c113 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
from fastapi import FastAPI, Request
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch
from pydantic import BaseModel
from typing import Optional
app = FastAPI()
class InputText(BaseModel):
text : str
threshold: Optional[float] = None
model_name = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_model.config.id2label[3] = "mixed"
model_name = 'qanastek/51-languages-classifier'
language_model = AutoModelForSequenceClassification.from_pretrained(model_name)
language_tokenizer = AutoTokenizer.from_pretrained(model_name)
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/language_detection")
async def language_detection(inp: InputText):
inputs = language_tokenizer(inp.text, return_tensors='pt')
with torch.no_grad():
logits = language_model(**inputs).logits
softmax = torch.nn.functional.sigmoid(logits)
# Apply the threshold by creating a mask
mask = softmax >= inp.threshold
# Filter the tensor based on the threshold
filtered_x = softmax[mask]
# Get the sorted indices of the filtered tensor
sorted_indices = torch.argsort(filtered_x, descending=True)
# Map the sorted indices back to the original tensor indices
original_indices = torch.nonzero(mask, as_tuple=True)[1][sorted_indices]
return [{"label":language_model.config.id2label[predicted_class_id.tolist()], "score":softmax[0, predicted_class_id].tolist()} for predicted_class_id in original_indices]
@app.post("/sentiment_score")
async def sentiment_score(inp: InputText):
text = inp.text
inputs = sentiment_tokenizer(text[:2500], return_tensors='pt')
with torch.no_grad():
logits = sentiment_model(**inputs).logits #+ 1
print(logits)
logits = logits + logits[0,1].abs()
# print(torch.nn.functional.sigmoid(logits))
# logits = logits / 10
# print(logits)
# print(torch.abs(logits[0,0] - logits[0,-1]))
# print(logits[0,1]//torch.max(torch.abs(logits[0,::2])))
logits = torch.cat(
(
logits, (
# ( logits[0,1] + torch.sign(logits[0,0] - logits[0,-1]) * (logits[0,0] - logits[0,-1])/2 )/2 +
# (logits[0,0] + logits[0,-1])/20
(1 - torch.abs(logits[0,0] - logits[0,-1])*(2+(logits[0,1]//torch.max(torch.abs(logits[0,::2])))))
).unsqueeze(0).unsqueeze(0)
), dim=-1
)
softmax = torch.nn.functional.softmax(
logits,
dim=-1
)
return [{"label":sentiment_model.config.id2label[predicted_class_id.tolist()], "score":softmax[0, predicted_class_id].tolist()} for predicted_class_id in softmax.argsort(dim=-1, descending=True)[0]]
|