Kartik17's picture
Update app.py
12e887d verified
import gradio as gr
from transformers import pipeline
import tensorflow as tf
import pandas as pd
from tensorflow.keras.layers import TextVectorization
# Load your data
new_data = pd.read_csv('train.csv') # Make sure to adjust the path if necessary
x = new_data['comment_text']
y = new_data[new_data.columns[2:]].values
max_features = 160000
vectorizer = TextVectorization(max_tokens=max_features,
output_sequence_length=1800,
output_mode='int')
vectorizer.get_vocabulary()
vectorizer.adapt(x.values)
model = tf.keras.models.load_model('finalprojecttoxic.h5')
translator_hindi = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en", tokenizer="Helsinki-NLP/opus-mt-hi-en")
def translate_hindi(from_text):
result2 = translator_hindi(from_text)
return result2[0]['translation_text']
def score_comment(comment):
vectorized_comment = vectorizer([comment])
results = model.predict(vectorized_comment)
text = ''
for idx, col in enumerate(new_data.columns[2:]):
text += '{}: {}\n'.format(col, results[0][idx] > 0.5)
return text
def combined_models(input):
translated_text = translate_hindi(input)
toxic_score = score_comment(input)
return translated_text, toxic_score
interface = gr.Interface(fn=combined_models, inputs="text", outputs=["text", "text"], title="Toxic Comment Analyzer")
interface.launch(share=True)