David-ipynb commited on
Commit
36da459
1 Parent(s): 4cd19be

Initial commit. Trained model, Flask web app, Docker container

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/model filter=lfs diff=lfs merge=lfs -text
37
+ training/checkpoints/model_epoch_1 filter=lfs diff=lfs merge=lfs -text
38
+ training/checkpoints/model_epoch_2 filter=lfs diff=lfs merge=lfs -text
39
+ training/checkpoints/model_epoch_3 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM arm64v8/python:3.8-slim-buster
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt /app/
6
+
7
+ RUN apt-get update && apt-get install -y libstdc++6 \
8
+ && pip install --trusted-host pypi.python.org -r requirements.txt \
9
+ && apt-get clean
10
+
11
+ COPY app /app/app/
12
+
13
+ COPY models /app/models
14
+
15
+ EXPOSE 9696
16
+
17
+ CMD ["python", "app/app.py"]
app/__pycache__/app.cpython-39.pyc ADDED
Binary file (250 Bytes). View file
 
app/__pycache__/main.cpython-39.pyc ADDED
Binary file (264 Bytes). View file
 
app/__pycache__/routes.cpython-39.pyc ADDED
Binary file (926 Bytes). View file
 
app/__pycache__/utils.cpython-39.pyc ADDED
Binary file (1.65 kB). View file
 
app/app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, request
2
+ from utils import predict_single, predict_batch
3
+
4
+
5
+ app = Flask(__name__)
6
+
7
+
8
+
9
+ @app.route('/')
10
+ @app.route('/home')
11
+ def status():
12
+ return jsonify({'status': 'ok'})
13
+
14
+
15
+ @app.route('/predict', methods=['POST'])
16
+ def predict():
17
+ data = request.get_json()
18
+
19
+ if 'text' not in data:
20
+ return jsonify({'error': 'Missing "text" parameter'}), 400
21
+
22
+ tweets = data['text']
23
+
24
+ if len(tweets) == 1:
25
+ response = predict_single(tweets[0])
26
+ elif len(tweets) > 1:
27
+ response = predict_batch(tweets)
28
+ else:
29
+ return jsonify({'error': 'Zero text strings posted'}), 400
30
+
31
+ return jsonify({
32
+ 'inputs': tweets,
33
+ 'predictions': response
34
+ })
35
+
36
+
37
+
38
+ if __name__ == '__main__':
39
+ app.run(debug=True, host='0.0.0.0', port=9696)
app/routes.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, request
2
+ from utils import predict_single, predict_batch
3
+
4
+ app = Flask(__name__)
5
+
6
+
7
+ @app.route('/')
8
+ @app.route('/home')
9
+ def status():
10
+ return jsonify({'status': 'ok'})
11
+
12
+
13
+ @app.route('/predict', methods=['POST'])
14
+ def predict():
15
+ data = request.get_json()
16
+
17
+ if 'text' not in data:
18
+ return jsonify({'error': 'Missing "text" parameter'}), 400
19
+
20
+ tweets = data['text']
21
+
22
+ if len(tweets) == 1:
23
+ response = predict_single(tweets[0])
24
+ elif len(tweets) > 1:
25
+ response = predict_batch(tweets)
26
+ else:
27
+ return jsonify({'error': 'Zero text strings posted'}), 400
28
+
29
+ return jsonify({
30
+ 'inputs': tweets,
31
+ 'predictions': response
32
+ })
app/utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import pickle
4
+ import re
5
+
6
+
7
+ model = torch.load("models/model", map_location='cpu')
8
+ tokenizer = torch.load("models/tokenizer")
9
+
10
+ with open("models/label_dict", 'rb') as file:
11
+ label_dict = pickle.load(file)
12
+
13
+ def preprocess_string(tweet: str) -> str:
14
+ tweet = tweet.lower().strip()
15
+ tweet = re.sub(r'[^\w\s]', '', tweet)
16
+ return tweet
17
+
18
+ def predict_single(tweet: str) -> str:
19
+ clean_tweet = preprocess_string(tweet)
20
+ input = tokenizer(clean_tweet, return_tensors='pt', truncation=True)
21
+ output = model(**input)
22
+ pred = torch.max(F.softmax(output.logits, dim=-1), dim=-1)[1]
23
+ pred = pred.data.item()
24
+ return label_dict[pred]
25
+
26
+ def predict_batch(tweets):
27
+ clean_tweets = [preprocess_string(tweet) for tweet in tweets]
28
+ inputs = tokenizer(clean_tweets, return_tensors='pt', padding=True, truncation=True)
29
+ outputs = model(**inputs)
30
+ preds = torch.max(F.softmax(outputs.logits, dim=-1), dim=-1)[1]
31
+ preds = preds.tolist()
32
+ return [label_dict[pred] for pred in preds]
models/label_dict ADDED
Binary file (77 Bytes). View file
 
models/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2f2873ded67e0c26f97306d9ba4d017a6f733692ea4614b16ca043add388976
3
+ size 267884825
models/tokenizer ADDED
Binary file (843 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ --find-links https://download.pytorch.org/whl/torch_stable.html
2
+ torch==1.9.0
3
+ transformers==4.26.1
4
+ flask==2.3.3
serve_predictions.ipynb ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import requests\n",
10
+ "import json"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "{'inputs': [\"That's just so romantic, I guess you're really falling for her!\",\n",
22
+ " 'Never have I felt so disgusted in my life.',\n",
23
+ " 'The sun was shining, we had a great lunch and then took a hike - it was the greatest day of my life.',\n",
24
+ " 'It came out of nowhere, noboy was expecting that.'],\n",
25
+ " 'predictions': ['love', 'anger', 'joy', 'fear']}"
26
+ ]
27
+ },
28
+ "execution_count": 2,
29
+ "metadata": {},
30
+ "output_type": "execute_result"
31
+ }
32
+ ],
33
+ "source": [
34
+ "response = requests.post(\n",
35
+ " url='http://127.0.0.1:9696/predict',\n",
36
+ " json={'text': \n",
37
+ " [\"That's just so romantic, I guess you're really falling for her!\",\n",
38
+ " \"Never have I felt so disgusted in my life.\",\n",
39
+ " \"The sun was shining, we had a great lunch and then took a hike - it was the greatest day of my life.\",\n",
40
+ " \"It came out of nowhere, noboy was expecting that.\"\n",
41
+ " ]}\n",
42
+ ")\n",
43
+ "\n",
44
+ "response = json.loads(response.text)\n",
45
+ "response"
46
+ ]
47
+ }
48
+ ],
49
+ "metadata": {
50
+ "kernelspec": {
51
+ "display_name": "hugging",
52
+ "language": "python",
53
+ "name": "python3"
54
+ },
55
+ "language_info": {
56
+ "codemirror_mode": {
57
+ "name": "ipython",
58
+ "version": 3
59
+ },
60
+ "file_extension": ".py",
61
+ "mimetype": "text/x-python",
62
+ "name": "python",
63
+ "nbconvert_exporter": "python",
64
+ "pygments_lexer": "ipython3",
65
+ "version": "3.9.7"
66
+ },
67
+ "orig_nbformat": 4
68
+ },
69
+ "nbformat": 4,
70
+ "nbformat_minor": 2
71
+ }
training/checkpoints/model_epoch_1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c581923c7e0ce1a9708dab5506d500baaf36225727b40da5a7f89b8f58b528
3
+ size 267892337
training/checkpoints/model_epoch_2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4679e143d9c933638ff9c8c0762ed7a54f20f8bae5fe4c257599d4c872a9a0e2
3
+ size 267892337
training/checkpoints/model_epoch_3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6dfd452565645d50ba5247202448f9148dc51b1a57583e10af6303988249f51
3
+ size 267892337
training/development.ipynb ADDED
The diff for this file is too large to render. See raw diff