Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

__pycache__/__init__.cpython-38.pyc +0 -0
__pycache__/aws_helper.cpython-38.pyc +0 -0
app.py +52 -20
aws_helper.py +26 -0
config.json +2 -2
flagged/log.csv +2 -0
flagged/output/tmp277x8crj.json +1 -0
predict.py +17 -0
pytorch_model.bin +2 -2
special_tokens_map.json +6 -42
tokenizer.json +4 -4
tokenizer_config.json +0 -42

__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (178 Bytes). View file

__pycache__/aws_helper.cpython-38.pyc ADDED Viewed

Binary file (917 Bytes). View file

app.py CHANGED Viewed

@@ -13,12 +13,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from os.path import dirname
 from typing import Optional
 import click
 import numpy as np
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import gradio as gr
@@ -26,11 +30,11 @@ import gradio as gr
 @click.command()
 @click.option(
     "--tokenizer_name_or_path",
-    default="tokenizer",
     help="Name or the path of the tokenizer.",
 )
 @click.option(
-    "--model_name_or_path", default="model", help="Name or the path of the model."
 )
 @click.option(
     "--labels", default="Negative,Positive", help="Comma-separated list of labels."
@@ -78,7 +82,6 @@ def sentiment_analysis(
         examples (str): Comma-separated list of examples to show in the Gradio interface.
     """
     labels = labels.split(",")
-    examples = [examples]
     def preprocess(text: str) -> str:
         """Preprocesses the text.
@@ -100,31 +103,60 @@ def sentiment_analysis(
         e_x = np.exp(x - np.max(x))
         return e_x / e_x.sum(axis=0)
-    def analyze_text(text):
-        model_path = f"{dirname(__file__)}/{model_name_or_path}/"
-        print(f"Loading model from {model_path}")
-        tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
-        print(f"Loading tokenizer from {tokenizer_path}")
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-        model = AutoModelForSequenceClassification.from_pretrained(model_path)
-        text = preprocess(text)
-        encoded_input = tokenizer(text, return_tensors="pt")
-        output = model(**encoded_input)
-        scores_ = output[0][0].detach().numpy()
-        scores_ = softmax(scores_)
-        scores = {l: float(s) for (l, s) in zip(labels, scores_)}
         return scores
     demo = gr.Interface(
         fn=analyze_text,
-        inputs=[gr.TextArea("Write your text or tweet here", label="Analyze Text")],
         outputs=["label"],
         title=title,
         description=description,
         interpretation=interpretation,
-        examples=examples,
     )
     demo.launch(share=True, debug=True)

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from os.path import dirname
 from typing import Optional
 import click
 import numpy as np
+import sagemaker
+from aws_helper import get_sagemaker_session
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from zenml.client import Client
 import gradio as gr
 @click.command()
 @click.option(
     "--tokenizer_name_or_path",
+    default=None,
     help="Name or the path of the tokenizer.",
 )
 @click.option(
+    "--model_name_or_path", default=None, help="Name or the path of the model."
 )
 @click.option(
     "--labels", default="Negative,Positive", help="Comma-separated list of labels."
         examples (str): Comma-separated list of examples to show in the Gradio interface.
     """
     labels = labels.split(",")
     def preprocess(text: str) -> str:
         """Preprocesses the text.
         e_x = np.exp(x - np.max(x))
         return e_x / e_x.sum(axis=0)
+    def analyze_text(inference_type, text):
+        if inference_type == "local":
+            cur_path = os.path.abspath(dirname(__file__))
+            model_path, tokenizer_path = cur_path, cur_path
+            if model_name_or_path:
+                model_path = f"{dirname(__file__)}/{model_name_or_path}/"
+            print(f"Loading model from {model_path}")
+            if tokenizer_name_or_path:
+                tokenizer_path = f"{dirname(__file__)}/{tokenizer_name_or_path}/"
+            print(f"Loading tokenizer from {tokenizer_path}")
+            tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+            model = AutoModelForSequenceClassification.from_pretrained(model_path)
+            text = preprocess(text)
+            encoded_input = tokenizer(text, return_tensors="pt")
+            output = model(**encoded_input)
+            scores_ = output[0][0].detach().numpy()
+            scores_ = softmax(scores_)
+            scores = {l: float(s) for (l, s) in zip(labels, scores_)}
+        else:
+            client = Client()
+            latest_run = client.get_pipeline("nlp_use_case_deploy_pipeline").runs[0]
+            endpoint_name = (
+                latest_run.steps["deploy_hf_to_sagemaker"]
+                .outputs["sagemaker_endpoint_name"]
+                .load()
+            )
+            predictor = sagemaker.Predictor(
+                endpoint_name=endpoint_name,
+                sagemaker_session=get_sagemaker_session(),
+                serializer=sagemaker.serializers.JSONSerializer(),
+                deserializer=sagemaker.deserializers.JSONDeserializer(),
+            )
+            res = predictor.predict({"inputs": text})
+            if res[0]["label"] == "LABEL_1":
+                scores = {"Negative": 1 - res[0]["score"], "Positive": res[0]["score"]}
+            else:
+                scores = {"Negative": res[0]["score"], "Positive": 1 - res[0]["score"]}
         return scores
     demo = gr.Interface(
         fn=analyze_text,
+        inputs=[
+            gr.Dropdown(
+                ["local", "sagemaker"], label="Select inference type", value="sagemaker"
+            ),
+            gr.TextArea("Write your text or tweet here", label="Analyze Text"),
+        ],
         outputs=["label"],
         title=title,
         description=description,
         interpretation=interpretation,
     )
     demo.launch(share=True, debug=True)

aws_helper.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import os
+import boto3
+import sagemaker
+REGION_NAME = "us-east-1"
+ROLE_NAME = "hamza_connector"
+os.environ["AWS_DEFAULT_REGION"] = REGION_NAME
+auth_arguments = {
+    "aws_access_key_id": os.environ["AWS_ACCESS_KEY_ID"],
+    "aws_secret_access_key": os.environ["AWS_SECRET_ACCESS_KEY"],
+    "aws_session_token": os.environ["AWS_SESSION_TOKEN"],
+    "region_name": REGION_NAME,
+}
+def get_sagemaker_role():
+    iam = boto3.client("iam", **auth_arguments)
+    role = iam.get_role(RoleName=ROLE_NAME)["Role"]["Arn"]
+    return role
+def get_sagemaker_session():
+    session = sagemaker.Session(boto3.Session(**auth_arguments))
+    return session

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/home/htahir1/.config/zenml/local_stores/21f121c8-d94d-420d-9661-b47d14a4c156/mlruns/174375914402683362/d6a66d05e30d4f2c8de9aa6245149295/artifacts/nlp_use_case_model/model",
   "architectures": [
     "RobertaForSequenceClassification"
   ],
@@ -21,7 +21,7 @@
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
-  "transformers_version": "4.34.1",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 50265

 {
+  "_name_or_path": "/home/htahir1/.config/zenml/local_stores/3eaeb016-f09b-41e8-8922-f7a9825e5ea9/mlruns/272458645716496394/f3a2d4bf4b2a4b9791712816097b46b1/artifacts/nlp_use_case_model/model",
   "architectures": [
     "RobertaForSequenceClassification"
   ],
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 50265

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Select inference type,Analyze Text,output,flag,username,timestamp
2	+ local,there are times when zenml is helpful and other times when i wish I never used it,/home/htahir1/workspace/zenml-plugins/huggingface_sagemaker_deployer/gradio/flagged/output/tmp277x8crj.json,,,2023-11-02 16:12:37.995984

flagged/output/tmp277x8crj.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"label": "Negative", "confidences": [{"label": "Negative", "confidence": 0.8703942894935608}, {"label": "Positive", "confidence": 0.12960568070411682}]}

predict.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import sagemaker
+from aws_helper import get_sagemaker_session
+# artifact = Client().get_artifact('54507ebb-5e1c-4283-ad39-8e8558eab289')
+# endpoint_name = artifact.load()
+endpoint_name = "huggingface-pytorch-inference-2023-11-02-13-25-13-481"
+predictor = sagemaker.Predictor(
+    endpoint_name=endpoint_name,
+    sagemaker_session=get_sagemaker_session(),
+    serializer=sagemaker.serializers.JSONSerializer(),
+    deserializer=sagemaker.deserializers.JSONDeserializer(),
+)
+res = predictor.predict({"inputs": "what is this "})
+# #probabilities, labels, predicted_label = parse_response(res)
+# print(probabilities, labels, predicted_label)
+print(res)

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ca6368ea5443d418abe88b1d23b1ecc729ff995876dab18596c2fce61a25f9c
-size 498655278

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed915719b7eef131898cd0bda38c5dd350caf05ad4729b9cfdf0845529a92527
+size 498659698

special_tokens_map.json CHANGED Viewed

@@ -1,25 +1,7 @@
 {
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
@@ -27,25 +9,7 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
   "mask_token": {
     "content": "<mask>",
     "lstrip": true,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
 }

tokenizer.json CHANGED Viewed

@@ -9,7 +9,7 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": true,
       "special": true
     },
     {
@@ -18,7 +18,7 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": true,
       "special": true
     },
     {
@@ -27,7 +27,7 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": true,
       "special": true
     },
     {
@@ -36,7 +36,7 @@
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": true,
       "special": true
     },
     {

       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
       "special": true
     },
     {
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
       "special": true
     },
     {
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
       "special": true
     },
     {
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
       "special": true
     },
     {

tokenizer_config.json CHANGED Viewed

@@ -1,47 +1,5 @@
 {
   "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "</s>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "3": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50264": {
-      "content": "<mask>",
-      "lstrip": true,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "cls_token": "<s>",

 {
   "add_prefix_space": false,
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "cls_token": "<s>",