File size: 1,452 Bytes
f1d1893
 
 
 
 
 
 
12e887d
f1d1893
 
 
 
12e887d
f1d1893
12e887d
f1d1893
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import pipeline
import tensorflow as tf
import pandas as pd
from tensorflow.keras.layers import TextVectorization

# Load your data
new_data = pd.read_csv('train.csv')  # Make sure to adjust the path if necessary

x = new_data['comment_text']
y = new_data[new_data.columns[2:]].values

max_features = 160000
vectorizer = TextVectorization(max_tokens=max_features,
                               output_sequence_length=1800,
                               output_mode='int')
vectorizer.get_vocabulary()
vectorizer.adapt(x.values)

model = tf.keras.models.load_model('finalprojecttoxic.h5')

translator_hindi = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en", tokenizer="Helsinki-NLP/opus-mt-hi-en")

def translate_hindi(from_text):
    result2 = translator_hindi(from_text)
    return result2[0]['translation_text']

def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(new_data.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx] > 0.5)
        
    return text

def combined_models(input):
    translated_text = translate_hindi(input)
    toxic_score = score_comment(input)
    
    return translated_text, toxic_score

interface = gr.Interface(fn=combined_models, inputs="text", outputs=["text", "text"], title="Toxic Comment Analyzer")
interface.launch(share=True)