labeling
Browse files- .ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
- Untitled.ipynb +122 -0
- app.py +12 -1
.ipynb_checkpoints/Untitled-checkpoint.ipynb
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [],
|
3 |
+
"metadata": {},
|
4 |
+
"nbformat": 4,
|
5 |
+
"nbformat_minor": 5
|
6 |
+
}
|
Untitled.ipynb
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "307086c9",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stderr",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"2023-04-24 15:59:18.802878: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
14 |
+
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
15 |
+
"2023-04-24 15:59:19.650995: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
|
16 |
+
]
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"source": [
|
20 |
+
"from transformers import AutoTokenizer, pipeline\n",
|
21 |
+
"from transformers import AutoModelForSequenceClassification as Model\n"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 18,
|
27 |
+
"id": "d27100b6",
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
|
32 |
+
"model = Model.from_pretrained(\"results/9500\", num_labels=6)\n",
|
33 |
+
"\n",
|
34 |
+
"\n",
|
35 |
+
"classifier = pipeline('text-classification', model=model, tokenizer=tokenizer, top_k=None)"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 21,
|
41 |
+
"id": "fa1ac990",
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [
|
44 |
+
{
|
45 |
+
"data": {
|
46 |
+
"text/plain": [
|
47 |
+
"[[{'label': 'LABEL_0', 'score': 0.6298099160194397},\n",
|
48 |
+
" {'label': 'LABEL_2', 'score': 0.11560700088739395},\n",
|
49 |
+
" {'label': 'LABEL_4', 'score': 0.10653004795312881},\n",
|
50 |
+
" {'label': 'LABEL_5', 'score': 0.059700410813093185},\n",
|
51 |
+
" {'label': 'LABEL_1', 'score': 0.04481027275323868},\n",
|
52 |
+
" {'label': 'LABEL_3', 'score': 0.04354238137602806}]]"
|
53 |
+
]
|
54 |
+
},
|
55 |
+
"execution_count": 21,
|
56 |
+
"metadata": {},
|
57 |
+
"output_type": "execute_result"
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"source": [
|
61 |
+
"classifier(\"i love you\")"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"execution_count": 6,
|
67 |
+
"id": "fd9fc0a2",
|
68 |
+
"metadata": {},
|
69 |
+
"outputs": [],
|
70 |
+
"source": [
|
71 |
+
"tokens = tokenizer(\"hi nice to meet you\")"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 11,
|
77 |
+
"id": "68a7f0da",
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"ename": "AttributeError",
|
82 |
+
"evalue": "'str' object has no attribute 'size'",
|
83 |
+
"output_type": "error",
|
84 |
+
"traceback": [
|
85 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
86 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
87 |
+
"\u001b[0;32m/tmp/ipykernel_40959/3404384710.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mtokens\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
88 |
+
"\u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1499\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1500\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1502\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1503\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
89 |
+
"\u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1560\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_return_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1562\u001b[0;31m outputs = self.bert(\n\u001b[0m\u001b[1;32m 1563\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1564\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
90 |
+
"\u001b[0;32m~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1499\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1500\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1502\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1503\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
91 |
+
"\u001b[0;32m~/.local/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 966\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"You cannot specify both input_ids and inputs_embeds at the same time\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0minput_ids\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0minput_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0minputs_embeds\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0minput_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputs_embeds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
92 |
+
"\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'size'"
|
93 |
+
]
|
94 |
+
}
|
95 |
+
],
|
96 |
+
"source": [
|
97 |
+
"model(*tokens)"
|
98 |
+
]
|
99 |
+
}
|
100 |
+
],
|
101 |
+
"metadata": {
|
102 |
+
"kernelspec": {
|
103 |
+
"display_name": "Python 3 (ipykernel)",
|
104 |
+
"language": "python",
|
105 |
+
"name": "python3"
|
106 |
+
},
|
107 |
+
"language_info": {
|
108 |
+
"codemirror_mode": {
|
109 |
+
"name": "ipython",
|
110 |
+
"version": 3
|
111 |
+
},
|
112 |
+
"file_extension": ".py",
|
113 |
+
"mimetype": "text/x-python",
|
114 |
+
"name": "python",
|
115 |
+
"nbconvert_exporter": "python",
|
116 |
+
"pygments_lexer": "ipython3",
|
117 |
+
"version": "3.10.6"
|
118 |
+
}
|
119 |
+
},
|
120 |
+
"nbformat": 4,
|
121 |
+
"nbformat_minor": 5
|
122 |
+
}
|
app.py
CHANGED
@@ -13,7 +13,10 @@ def analyze(input, model):
|
|
13 |
|
14 |
# load my fine-tuned model
|
15 |
fine_tuned = "jbraha/tweet-bert"
|
|
|
|
|
16 |
|
|
|
17 |
|
18 |
#text insert
|
19 |
input = st.text_area("insert text to be analyzed", value="Nice to see you today.",
|
@@ -39,7 +42,15 @@ else:
|
|
39 |
|
40 |
|
41 |
if st.button('Analyze'):
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
else:
|
44 |
st.write('Excited to analyze!')
|
45 |
|
|
|
|
|
|
|
|
13 |
|
14 |
# load my fine-tuned model
|
15 |
fine_tuned = "jbraha/tweet-bert"
|
16 |
+
labels = {'LABEL_0': 'toxic', 'LABEL_1': 'severe_toxic', 'LABEL_2': 'obscene', 'LABEL_3': 'threat',
|
17 |
+
'LABEL_4': 'insult', 'LABEL_5': 'identity_hate'}
|
18 |
|
19 |
+
# make a dictionary of the labels with keys like "LABEL_0" and values like "toxic"
|
20 |
|
21 |
#text insert
|
22 |
input = st.text_area("insert text to be analyzed", value="Nice to see you today.",
|
|
|
42 |
|
43 |
|
44 |
if st.button('Analyze'):
|
45 |
+
result = classifier(input)
|
46 |
+
if option == 'Fine-tuned':
|
47 |
+
output = {'Toxic': result['LABEL_0']}
|
48 |
+
del result['LABEL_0']
|
49 |
+
output[max(result, key=result.get)] = result[max(result, key=result.get)]
|
50 |
+
st.write(output)
|
51 |
else:
|
52 |
st.write('Excited to analyze!')
|
53 |
|
54 |
+
|
55 |
+
|
56 |
+
|