htahir1 commited on
Commit
e00e050
1 Parent(s): 84999d8

Upload folder using huggingface_hub

Browse files
__pycache__/__init__.cpython-38.pyc ADDED
Binary file (178 Bytes). View file
 
__pycache__/aws_helper.cpython-38.pyc ADDED
Binary file (917 Bytes). View file
 
app.py CHANGED
@@ -13,12 +13,16 @@
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
 
16
  from os.path import dirname
17
  from typing import Optional
18
 
19
  import click
20
  import numpy as np
 
 
21
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
22
 
23
  import gradio as gr
24
 
@@ -26,11 +30,11 @@ import gradio as gr
26
  @click.command()
27
  @click.option(
28
  "--tokenizer_name_or_path",
29
- default="tokenizer",
30
  help="Name or the path of the tokenizer.",
31
  )
32
  @click.option(
33
- "--model_name_or_path", default="model", help="Name or the path of the model."
34
  )
35
  @click.option(
36
  "--labels", default="Negative,Positive", help="Comma-separated list of labels."
@@ -78,7 +82,6 @@ def sentiment_analysis(
78
  examples (str): Comma-separated list of examples to show in the Gradio interface.
79
  """
80
  labels = labels.split(",")
81
- examples = [examples]
82
 
83
  def preprocess(text: str) -> str:
84
  """Preprocesses the text.
@@ -100,31 +103,60 @@ def sentiment_analysis(
100
  e_x = np.exp(x - np.max(x))
101
  return e_x / e_x.sum(axis=0)
102
 
103
- def analyze_text(text):
104
- model_path = f"{dirname(__file__)}/{model_name_or_path}/"
105
- print(f"Loading model from {model_path}")
106
- tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
107
- print(f"Loading tokenizer from {tokenizer_path}")
108
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
109
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
110
-
111
- text = preprocess(text)
112
- encoded_input = tokenizer(text, return_tensors="pt")
113
- output = model(**encoded_input)
114
- scores_ = output[0][0].detach().numpy()
115
- scores_ = softmax(scores_)
116
-
117
- scores = {l: float(s) for (l, s) in zip(labels, scores_)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  return scores
119
 
120
  demo = gr.Interface(
121
  fn=analyze_text,
122
- inputs=[gr.TextArea("Write your text or tweet here", label="Analyze Text")],
 
 
 
 
 
123
  outputs=["label"],
124
  title=title,
125
  description=description,
126
  interpretation=interpretation,
127
- examples=examples,
128
  )
129
 
130
  demo.launch(share=True, debug=True)
 
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
+ import os
17
  from os.path import dirname
18
  from typing import Optional
19
 
20
  import click
21
  import numpy as np
22
+ import sagemaker
23
+ from aws_helper import get_sagemaker_session
24
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
25
+ from zenml.client import Client
26
 
27
  import gradio as gr
28
 
 
30
  @click.command()
31
  @click.option(
32
  "--tokenizer_name_or_path",
33
+ default=None,
34
  help="Name or the path of the tokenizer.",
35
  )
36
  @click.option(
37
+ "--model_name_or_path", default=None, help="Name or the path of the model."
38
  )
39
  @click.option(
40
  "--labels", default="Negative,Positive", help="Comma-separated list of labels."
 
82
  examples (str): Comma-separated list of examples to show in the Gradio interface.
83
  """
84
  labels = labels.split(",")
 
85
 
86
  def preprocess(text: str) -> str:
87
  """Preprocesses the text.
 
103
  e_x = np.exp(x - np.max(x))
104
  return e_x / e_x.sum(axis=0)
105
 
106
+ def analyze_text(inference_type, text):
107
+ if inference_type == "local":
108
+ cur_path = os.path.abspath(dirname(__file__))
109
+ model_path, tokenizer_path = cur_path, cur_path
110
+ if model_name_or_path:
111
+ model_path = f"{dirname(__file__)}/{model_name_or_path}/"
112
+ print(f"Loading model from {model_path}")
113
+ if tokenizer_name_or_path:
114
+ tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
115
+ print(f"Loading tokenizer from {tokenizer_path}")
116
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
117
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
118
+
119
+ text = preprocess(text)
120
+ encoded_input = tokenizer(text, return_tensors="pt")
121
+ output = model(**encoded_input)
122
+ scores_ = output[0][0].detach().numpy()
123
+ scores_ = softmax(scores_)
124
+ scores = {l: float(s) for (l, s) in zip(labels, scores_)}
125
+ else:
126
+ client = Client()
127
+ latest_run = client.get_pipeline("nlp_use_case_deploy_pipeline").runs[0]
128
+ endpoint_name = (
129
+ latest_run.steps["deploy_hf_to_sagemaker"]
130
+ .outputs["sagemaker_endpoint_name"]
131
+ .load()
132
+ )
133
+
134
+ predictor = sagemaker.Predictor(
135
+ endpoint_name=endpoint_name,
136
+ sagemaker_session=get_sagemaker_session(),
137
+ serializer=sagemaker.serializers.JSONSerializer(),
138
+ deserializer=sagemaker.deserializers.JSONDeserializer(),
139
+ )
140
+ res = predictor.predict({"inputs": text})
141
+ if res[0]["label"] == "LABEL_1":
142
+ scores = {"Negative": 1 - res[0]["score"], "Positive": res[0]["score"]}
143
+ else:
144
+ scores = {"Negative": res[0]["score"], "Positive": 1 - res[0]["score"]}
145
+
146
  return scores
147
 
148
  demo = gr.Interface(
149
  fn=analyze_text,
150
+ inputs=[
151
+ gr.Dropdown(
152
+ ["local", "sagemaker"], label="Select inference type", value="sagemaker"
153
+ ),
154
+ gr.TextArea("Write your text or tweet here", label="Analyze Text"),
155
+ ],
156
  outputs=["label"],
157
  title=title,
158
  description=description,
159
  interpretation=interpretation,
 
160
  )
161
 
162
  demo.launch(share=True, debug=True)
aws_helper.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import boto3
4
+ import sagemaker
5
+
6
+ REGION_NAME = "us-east-1"
7
+ ROLE_NAME = "hamza_connector"
8
+ os.environ["AWS_DEFAULT_REGION"] = REGION_NAME
9
+
10
+ auth_arguments = {
11
+ "aws_access_key_id": os.environ["AWS_ACCESS_KEY_ID"],
12
+ "aws_secret_access_key": os.environ["AWS_SECRET_ACCESS_KEY"],
13
+ "aws_session_token": os.environ["AWS_SESSION_TOKEN"],
14
+ "region_name": REGION_NAME,
15
+ }
16
+
17
+
18
+ def get_sagemaker_role():
19
+ iam = boto3.client("iam", **auth_arguments)
20
+ role = iam.get_role(RoleName=ROLE_NAME)["Role"]["Arn"]
21
+ return role
22
+
23
+
24
+ def get_sagemaker_session():
25
+ session = sagemaker.Session(boto3.Session(**auth_arguments))
26
+ return session
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/htahir1/.config/zenml/local_stores/21f121c8-d94d-420d-9661-b47d14a4c156/mlruns/174375914402683362/d6a66d05e30d4f2c8de9aa6245149295/artifacts/nlp_use_case_model/model",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
@@ -21,7 +21,7 @@
21
  "position_embedding_type": "absolute",
22
  "problem_type": "single_label_classification",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.34.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
 
1
  {
2
+ "_name_or_path": "/home/htahir1/.config/zenml/local_stores/3eaeb016-f09b-41e8-8922-f7a9825e5ea9/mlruns/272458645716496394/f3a2d4bf4b2a4b9791712816097b46b1/artifacts/nlp_use_case_model/model",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
 
21
  "position_embedding_type": "absolute",
22
  "problem_type": "single_label_classification",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.28.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Select inference type,Analyze Text,output,flag,username,timestamp
2
+ local,there are times when zenml is helpful and other times when i wish I never used it,/home/htahir1/workspace/zenml-plugins/huggingface_sagemaker_deployer/gradio/flagged/output/tmp277x8crj.json,,,2023-11-02 16:12:37.995984
flagged/output/tmp277x8crj.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"label": "Negative", "confidences": [{"label": "Negative", "confidence": 0.8703942894935608}, {"label": "Positive", "confidence": 0.12960568070411682}]}
predict.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sagemaker
2
+ from aws_helper import get_sagemaker_session
3
+
4
+ # artifact = Client().get_artifact('54507ebb-5e1c-4283-ad39-8e8558eab289')
5
+ # endpoint_name = artifact.load()
6
+ endpoint_name = "huggingface-pytorch-inference-2023-11-02-13-25-13-481"
7
+
8
+ predictor = sagemaker.Predictor(
9
+ endpoint_name=endpoint_name,
10
+ sagemaker_session=get_sagemaker_session(),
11
+ serializer=sagemaker.serializers.JSONSerializer(),
12
+ deserializer=sagemaker.deserializers.JSONDeserializer(),
13
+ )
14
+ res = predictor.predict({"inputs": "what is this "})
15
+ # #probabilities, labels, predicted_label = parse_response(res)
16
+ # print(probabilities, labels, predicted_label)
17
+ print(res)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ca6368ea5443d418abe88b1d23b1ecc729ff995876dab18596c2fce61a25f9c
3
- size 498655278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed915719b7eef131898cd0bda38c5dd350caf05ad4729b9cfdf0845529a92527
3
+ size 498659698
special_tokens_map.json CHANGED
@@ -1,25 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
@@ -27,25 +9,7 @@
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
tokenizer.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
@@ -18,7 +18,7 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
@@ -27,7 +27,7 @@
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": true,
31
  "special": true
32
  },
33
  {
@@ -36,7 +36,7 @@
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": true,
40
  "special": true
41
  },
42
  {
 
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
 
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": false,
31
  "special": true
32
  },
33
  {
 
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
  "special": true
41
  },
42
  {
tokenizer_config.json CHANGED
@@ -1,47 +1,5 @@
1
  {
2
  "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
  "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
  "cls_token": "<s>",
 
1
  {
2
  "add_prefix_space": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "bos_token": "<s>",
4
  "clean_up_tokenization_spaces": true,
5
  "cls_token": "<s>",