File size: 2,962 Bytes
ea8b4a5
b01c113
 
ea8b4a5
6a128a7
d812ab5
 
 
b01c113
ea8b4a5
 
6a128a7
ea8b4a5
b01c113
 
 
 
 
 
6a128a7
 
 
 
 
 
 
 
 
 
d812ab5
 
 
b01c113
 
6a128a7
 
28c31d6
2cb17e6
6a128a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28c31d6
6a128a7
 
79fd2ad
ea8b4a5
 
b01c113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc6b847
b01c113
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from fastapi import FastAPI, Request
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch
from pydantic import BaseModel
from typing import Optional

app = FastAPI()


class InputText(BaseModel):
    text : str
    threshold: Optional[float] = None


model_name = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_model.config.id2label[3] = "mixed"

model_name = 'qanastek/51-languages-classifier'
language_model = AutoModelForSequenceClassification.from_pretrained(model_name)
language_tokenizer = AutoTokenizer.from_pretrained(model_name)







@app.get("/")
def greet_json():
    return {"Hello": "World!"}



@app.post("/language_detection")
async def language_detection(inp: InputText):
    inputs = language_tokenizer(inp.text, return_tensors='pt')
    with torch.no_grad():
        logits = language_model(**inputs).logits
    
    softmax = torch.nn.functional.sigmoid(logits)
    
    # Apply the threshold by creating a mask
    mask = softmax >= inp.threshold
    
    # Filter the tensor based on the threshold
    filtered_x = softmax[mask]
    
    # Get the sorted indices of the filtered tensor
    sorted_indices = torch.argsort(filtered_x, descending=True)
    
    # Map the sorted indices back to the original tensor indices
    original_indices = torch.nonzero(mask, as_tuple=True)[1][sorted_indices]
    
    return [{"label":language_model.config.id2label[predicted_class_id.tolist()], "score":softmax[0, predicted_class_id].tolist()} for predicted_class_id in original_indices]


@app.post("/sentiment_score")
async def sentiment_score(inp: InputText):
    text = inp.text
    inputs = sentiment_tokenizer(text[:2500], return_tensors='pt')
    
    with torch.no_grad():
        logits = sentiment_model(**inputs).logits #+ 1
    
    
    print(logits)
    
    logits = logits + logits[0,1].abs()
    
    # print(torch.nn.functional.sigmoid(logits))
    
    # logits = logits / 10
    
    # print(logits)
    
    # print(torch.abs(logits[0,0] - logits[0,-1]))
    # print(logits[0,1]//torch.max(torch.abs(logits[0,::2])))
    
    logits = torch.cat(
        (
            logits, (
                # ( logits[0,1] + torch.sign(logits[0,0] - logits[0,-1]) * (logits[0,0] - logits[0,-1])/2 )/2 + 
                # (logits[0,0] + logits[0,-1])/20
                (1 - torch.abs(logits[0,0] - logits[0,-1])*(2+(logits[0,1]//torch.max(torch.abs(logits[0,::2])))))
                ).unsqueeze(0).unsqueeze(0)
        ), dim=-1
    )
    
    softmax = torch.nn.functional.softmax(
        logits, 
        dim=-1
    )
    
    return [{"label":sentiment_model.config.id2label[predicted_class_id.tolist()], "score":softmax[0, predicted_class_id].tolist()} for predicted_class_id in softmax.argsort(dim=-1, descending=True)[0]]