malmukhtar commited on
Commit
e9dc5bd
·
1 Parent(s): 58d5a80

Initial commit

Browse files
accessibility_classifier/.DS_Store ADDED
Binary file (6.15 kB). View file
 
accessibility_classifier/api.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Depends, FastAPI
2
+ from .classifier.model import Model, get_model
3
+
4
+ app = FastAPI()
5
+
6
+
7
+ @app.post("/predict")
8
+ async def predict(issue: str, model: Model = Depends(get_model)):
9
+ accessibility_confidence, nonaccessibility_confidence = model.predict(issue)
10
+ return {"Accessibility": str(accessibility_confidence),
11
+ "Non-accessibility": str(nonaccessibility_confidence)}
accessibility_classifier/classifier/model.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.modeling_outputs import SequenceClassifierOutput
2
+ from transformers import AlbertForSequenceClassification, AlbertTokenizer
3
+ import torch
4
+ import torch.nn.functional as F
5
+ import numpy as np
6
+
7
+
8
+ class AlbertForMultilabelSequenceClassification(AlbertForSequenceClassification):
9
+ def __init__(self, config):
10
+ super().__init__(config)
11
+
12
+ def forward(self,
13
+ input_ids=None,
14
+ attention_mask=None,
15
+ token_type_ids=None,
16
+ position_ids=None,
17
+ head_mask=None,
18
+ inputs_embeds=None,
19
+ labels=None,
20
+ output_attentions=None,
21
+ output_hidden_states=None,
22
+ return_dict=None):
23
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
24
+
25
+ outputs = self.albert(input_ids,
26
+ attention_mask=attention_mask,
27
+ token_type_ids=token_type_ids,
28
+ position_ids=position_ids,
29
+ head_mask=head_mask,
30
+ inputs_embeds=inputs_embeds,
31
+ output_attentions=output_attentions,
32
+ output_hidden_states=output_hidden_states,
33
+ return_dict=return_dict)
34
+
35
+ pooled_output = outputs[1]
36
+ pooled_output = self.dropout(pooled_output)
37
+ logits = self.classifier(pooled_output)
38
+
39
+ loss = None
40
+ if labels is not None:
41
+ loss_fct = torch.nn.BCEWithLogitsLoss()
42
+ loss = loss_fct(logits.view(-1, self.num_labels),
43
+ labels.float().view(-1, self.num_labels))
44
+
45
+ if not return_dict:
46
+ output = (logits,) + outputs[2:]
47
+ return ((loss,) + output) if loss is not None else output
48
+
49
+ return SequenceClassifierOutput(loss=loss,
50
+ logits=logits,
51
+ hidden_states=outputs.hidden_states,
52
+ attentions=outputs.attentions)
53
+
54
+
55
+ class Model:
56
+ def __init__(self):
57
+
58
+ self.device = torch.device(
59
+ "cuda:0" if torch.cuda.is_available() else "cpu")
60
+
61
+ self.labels = ['Accessibility', 'Non-accessibility']
62
+ self.tokenizer = AlbertTokenizer.from_pretrained(
63
+ 'albert-base-v2', do_lower_case=True)
64
+ classifier = AlbertForMultilabelSequenceClassification.from_pretrained(
65
+ 'albert-base-v2',
66
+ output_attentions=False,
67
+ output_hidden_states=False,
68
+ num_labels=2
69
+ )
70
+
71
+ classifier.load_state_dict(
72
+ torch.load("assets/pytorch_model.bin", map_location=self.device))
73
+ classifier = classifier.eval()
74
+ self.classifier = classifier.to(self.device)
75
+
76
+ def predict(self, text):
77
+ encoded_text = self.tokenizer.encode_plus(
78
+ text,
79
+ max_length=30,
80
+ add_special_tokens=True,
81
+ return_token_type_ids=False,
82
+ padding='longest',
83
+ return_attention_mask=True,
84
+ return_tensors="pt",
85
+ truncation=True,
86
+
87
+ )
88
+
89
+ input_ids = encoded_text["input_ids"].to(self.device)
90
+ attention_mask = encoded_text["attention_mask"].to(self.device)
91
+
92
+ with torch.no_grad():
93
+ probabilities = self.classifier(input_ids, attention_mask)
94
+
95
+ prediction = F.softmax(probabilities.logits,
96
+ dim=1).cpu().numpy().flatten().max()
97
+ prediction_index = np.where(F.softmax(probabilities.logits,
98
+ dim=1).cpu().numpy() == prediction)[1][0]
99
+ label = self.labels[prediction_index]
100
+
101
+ all_predictions = F.softmax(
102
+ probabilities.logits, dim=1).cpu().numpy().flatten()
103
+
104
+ accessibility_prediction = all_predictions[0]
105
+ nonaccessibility_prediction = all_predictions[1]
106
+
107
+ return (accessibility_prediction, nonaccessibility_prediction)
108
+
109
+
110
+ model = Model()
111
+ # model.predict("this is an impsorvement")
112
+
113
+
114
+ def get_model():
115
+ return model
assets/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b432cd7171f3d2ff0150c24641ce9b956165f5a73f439a1c0b56d4f963d1bf
3
+ size 46749762
bin/download_model ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # !/usr/bin/env python
2
+ import gdown
3
+
4
+ gdown.download(
5
+ "assets/pytorch_model.bin",
6
+ )
bin/start_server ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ uvicorn --port 8081 accessibility_classifier.api:app
bin/test_request ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ http POST http://localhost:8000/predict text="This is an accessibility issue"
requirements.txt ADDED
Binary file (1.26 kB). View file