bhoov commited on
Commit
ea2c8dc
1 Parent(s): a283b22

Finish documenting the API endpoints

Browse files
Files changed (2) hide show
  1. server/main.py +78 -75
  2. server/model_api.py +4 -0
server/main.py CHANGED
@@ -8,7 +8,7 @@ import utils.path_fixes as pf
8
  from utils.f import ifnone
9
 
10
  from data_processing import from_model
11
- from transformer_details import from_pretrained
12
 
13
  app = connexion.FlaskApp(__name__, static_folder="client/dist", specification_dir=".")
14
  flask_app = app.app
@@ -36,10 +36,24 @@ def send_static_client(path):
36
  ## CONNEXION API ##
37
  # ======================================================================
38
  def get_model_details(**request):
39
- model = request['model']
40
- deets = from_pretrained(model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- info = deets.model.config
43
  nlayers = info.num_hidden_layers
44
  nheads = info.num_attention_heads
45
 
@@ -53,9 +67,36 @@ def get_model_details(**request):
53
  "payload": payload_out,
54
  }
55
 
56
- def get_attention_and_meta(**request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  model = request["model"]
58
- details = from_pretrained(model)
59
 
60
  sentence = request["sentence"]
61
  layer = int(request["layer"])
@@ -69,17 +110,42 @@ def get_attention_and_meta(**request):
69
  "payload": payload_out
70
  }
71
 
72
-
73
  def update_masked_attention(**request):
74
- """
75
- Return attention information from tokens and mask indices.
76
-
77
- Object: {"a" : {"sentence":__, "mask_inds"}, "b" : {...}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  """
79
  payload = request["payload"]
80
 
81
  model = payload['model']
82
- details = from_pretrained(model)
83
 
84
  tokens = payload["tokens"]
85
  sentence = payload["sentence"]
@@ -101,69 +167,6 @@ def update_masked_attention(**request):
101
  "payload": payload_out,
102
  }
103
 
104
-
105
- def nearest_embedding_search(**request):
106
- """Return the token text and the metadata in JSON"""
107
- model = request["model"]
108
- corpus = request["corpus"]
109
-
110
- try:
111
- details = from_pretrained(model)
112
- except KeyError as e:
113
- return {'status': 405, "payload": None}
114
-
115
- try:
116
- cc = from_model(model, corpus)
117
- except FileNotFoundError as e:
118
- return {
119
- "status": 406,
120
- "payload": None
121
- }
122
-
123
- q = np.array(request["embedding"]).reshape((1, -1)).astype(np.float32)
124
- layer = int(request["layer"])
125
- heads = list(map(int, list(set(request["heads"]))))
126
- k = int(request["k"])
127
-
128
- out = cc.search_embeddings(layer, q, k)
129
-
130
- payload_out = [o.to_json(layer, heads) for o in out]
131
-
132
- return {
133
- "status": 200,
134
- "payload": payload_out
135
- }
136
-
137
-
138
- def nearest_context_search(**request):
139
- """Return the token text and the metadata in JSON"""
140
- model = request["model"]
141
- corpus = request["corpus"]
142
- print("CORPUS: ", corpus)
143
-
144
- try:
145
- details = from_pretrained(model)
146
- except KeyError as e:
147
- return {'status': 405, "payload": None}
148
-
149
- try:
150
- cc = from_model(model, corpus)
151
- except FileNotFoundError as e:
152
- return {'status': 406, "payload": None}
153
-
154
- q = np.array(request["context"]).reshape((1, -1)).astype(np.float32)
155
- layer = int(request["layer"])
156
- heads = list(map(int, list(set(request["heads"]))))
157
- k = int(request["k"])
158
-
159
- out = cc.search_contexts(layer, heads, q, k)
160
- payload_out = [o.to_json(layer, heads) for o in out]
161
-
162
- return {
163
- "status": 200,
164
- "payload": payload_out,
165
- }
166
-
167
  app.add_api("swagger.yaml")
168
 
169
  # Setup code
 
8
  from utils.f import ifnone
9
 
10
  from data_processing import from_model
11
+ from model_api import get_details
12
 
13
  app = connexion.FlaskApp(__name__, static_folder="client/dist", specification_dir=".")
14
  flask_app = app.app
 
36
  ## CONNEXION API ##
37
  # ======================================================================
38
  def get_model_details(**request):
39
+ """Get important information about a model, like the number of layers and heads
40
+
41
+ Args:
42
+ request['model']: The model name
43
+
44
+ Returns:
45
+ {
46
+ status: 200,
47
+ payload: {
48
+ nlayers (int)
49
+ nheads (int)
50
+ }
51
+ }
52
+ """
53
+ mname = request['model']
54
+ deets = get_details(mname)
55
 
56
+ info = deets.config
57
  nlayers = info.num_hidden_layers
58
  nheads = info.num_attention_heads
59
 
 
67
  "payload": payload_out,
68
  }
69
 
70
+ def get_attentions_and_preds(**request):
71
+ """For a sentence, at a layer, get the attentions and predictions
72
+
73
+ Args:
74
+ request['model']: Model name
75
+ request['sentence']: Sentence to get the attentions for
76
+ request['layer']: Which layer to extract from
77
+
78
+ Returns:
79
+ {
80
+ status: 200
81
+ payload: {
82
+ aa: {
83
+ att: Array((nheads, ntoks, ntoks))
84
+ left: [{
85
+ text (str),
86
+ topk_words (List[str]),
87
+ topk_probs (List[float])
88
+ }, ...]
89
+ right: [{
90
+ text (str),
91
+ topk_words (List[str]),
92
+ topk_probs (List[float])
93
+ }, ...]
94
+ }
95
+ }
96
+ }
97
+ """
98
  model = request["model"]
99
+ details = get_details(model)
100
 
101
  sentence = request["sentence"]
102
  layer = int(request["layer"])
 
110
  "payload": payload_out
111
  }
112
 
 
113
  def update_masked_attention(**request):
114
+ """From tokens and indices of what should be masked, get the attentions and predictions
115
+
116
+ payload = request['payload']
117
+
118
+ Args:
119
+ payload['model'] (str): Model name
120
+ payload['tokens'] (List[str]): Tokens to pass through the model
121
+ payload['sentence'] (str): Original sentence the tokens came from
122
+ payload['mask'] (List[int]): Which indices to mask
123
+ payload['layer'] (int): Which layer to extract information from
124
+
125
+ Returns:
126
+ {
127
+ status: 200
128
+ payload: {
129
+ aa: {
130
+ att: Array((nheads, ntoks, ntoks))
131
+ left: [{
132
+ text (str),
133
+ topk_words (List[str]),
134
+ topk_probs (List[float])
135
+ }, ...]
136
+ right: [{
137
+ text (str),
138
+ topk_words (List[str]),
139
+ topk_probs (List[float])
140
+ }, ...]
141
+ }
142
+ }
143
+ }
144
  """
145
  payload = request["payload"]
146
 
147
  model = payload['model']
148
+ details = get_details(model)
149
 
150
  tokens = payload["tokens"]
151
  sentence = payload["sentence"]
 
167
  "payload": payload_out,
168
  }
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  app.add_api("swagger.yaml")
171
 
172
  # Setup code
server/model_api.py CHANGED
@@ -6,6 +6,10 @@ from transformers import AutoConfig, AutoTokenizer, AutoModelWithLMHead, AutoMod
6
  from transformer_formatter import TransformerOutputFormatter
7
  from utils.f import delegates, pick, memoize
8
 
 
 
 
 
9
  def get_model_tok(mname):
10
  conf = AutoConfig.from_pretrained(mname, output_attentions=True, output_past=False)
11
  tok = AutoTokenizer.from_pretrained(mname, config=conf)
 
6
  from transformer_formatter import TransformerOutputFormatter
7
  from utils.f import delegates, pick, memoize
8
 
9
+ @memoize
10
+ def get_details(mname):
11
+ return ModelDetails(mname)
12
+
13
  def get_model_tok(mname):
14
  conf = AutoConfig.from_pretrained(mname, output_attentions=True, output_past=False)
15
  tok = AutoTokenizer.from_pretrained(mname, config=conf)