Kartik17 commited on
Commit
5c88e81
1 Parent(s): 7c271dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import tensorflow as tf
4
+ import numpy as np
5
+ data=pd.read_csv('train.csv')
6
+ data.head(5)
7
+
8
+ from tensorflow.keras.layers import TextVectorization
9
+
10
+ x=data['comment_text']
11
+ y=data[data.columns[2:]].values
12
+
13
+ max_features=200000
14
+ vectorizer=TextVectorization(max_tokens=max_features,
15
+ output_sequence_length=1800,
16
+ output_mode='int')
17
+ vectorizer.get_vocabulary()
18
+ vectorizer.adapt(x.values)
19
+ vectorizer("have you watched breaking bad")[:5]
20
+ vectorized_text=vectorizer(x.values)
21
+ dataset=tf.data.Dataset.from_tensor_slices((vectorized_text, y))
22
+ dataset=dataset.cache()
23
+ dataset=dataset.shuffle(160000)
24
+ dataset=dataset.batch(16)
25
+ dataset=dataset.prefetch(8)
26
+
27
+ batch_x, batch_y = dataset.as_numpy_iterator().next()
28
+
29
+ train=dataset.take(int(len(dataset)*.7))
30
+ val=dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
31
+ test=dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
32
+
33
+ train_generator=train.as_numpy_iterator()
34
+ train_generator.next()
35
+
36
+ from tensorflow.keras.models import Sequential
37
+ from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
38
+
39
+ model=Sequential()
40
+ model.add(Embedding(max_features+1, 32))
41
+ model.add(Bidirectional(LSTM(32, activation='tanh')))
42
+ model.add(Dense(128, activation='relu'))
43
+ model.add(Dense(256, activation='relu'))
44
+ model.add(Dense(128, activation='relu'))
45
+ model.add(Dense(6, activation='sigmoid'))
46
+ model.compile(loss='BinaryCrossentropy', optimizer='adam', metrics=['accuracy'])
47
+
48
+ model.summary()
49
+
50
+ history=model.fit(train, epochs=10, validation_data=val)
51
+
52
+ model.evaluate(test)
53
+
54
+ x_batch, y_batch = test.as_numpy_iterator().next()
55
+ (model.predict(x_batch) > 0.5).astype(int)
56
+ input_text=vectorizer('I am coming to kill you pal')
57
+
58
+ input_text[:7]
59
+ batch=test.as_numpy_iterator().next()
60
+ res=model.predict(np.expand_dims(input_text,0))
61
+ res
62
+ model.save('finalprojecttoxic.h5')
63
+
64
+ from transformers import pipeline
65
+
66
+ import gradio as gr
67
+
68
+ model=tf.keras.models.load_model('finalprojecttoxic.h5')
69
+ input_str=vectorizer('Hey i freaking hate you!. I\'m going to hurt you!')
70
+ res=model.predict(np.expand_dims(input_str,0))
71
+
72
+
73
+ translator_hindi = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en", tokenizer="Helsinki-NLP/opus-mt-hi-en")
74
+ hindi_text = "नमस्ते, आप कैसे हैं?"
75
+ en_to_hin = translator_hindi(hindi_text)
76
+ en_to_hin[0]['translation_text']
77
+
78
+ def translate_hindi(from_text):
79
+ result2 = translator_hindi(from_text)
80
+
81
+ return result2[0]['translation_text']
82
+
83
+
84
+ translate_hindi('नमस्ते, आप कैसे हैं?')
85
+
86
+ def score_comment(comment):
87
+ vectorized_comment = vectorizer([comment])
88
+ results=model.predict(vectorized_comment)
89
+
90
+ text=''
91
+ for idx, col in enumerate(data.columns[2:]):
92
+ text+= '{}: {}\n'.format(col, results[0][idx]>0.5)
93
+
94
+ return text
95
+
96
+ def combined_models(input):
97
+ output1=translate_hindi(input)
98
+ output2=score_comment(input)
99
+
100
+ return output1, output2
101
+
102
+ interface = gr.Interface(fn=combined_models, inputs="text", outputs=["text","text"],title="Toxic Comment Analyzer")
103
+
104
+ interface.launch(share=True)
105
+