import gradio as gr from transformers import pipeline import tensorflow as tf import pandas as pd from tensorflow.keras.layers import TextVectorization # Load your data new_data = pd.read_csv('train.csv') # Make sure to adjust the path if necessary x = new_data['comment_text'] y = new_data[new_data.columns[2:]].values max_features = 160000 vectorizer = TextVectorization(max_tokens=max_features, output_sequence_length=1800, output_mode='int') vectorizer.get_vocabulary() vectorizer.adapt(x.values) model = tf.keras.models.load_model('finalprojecttoxic.h5') translator_hindi = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en", tokenizer="Helsinki-NLP/opus-mt-hi-en") def translate_hindi(from_text): result2 = translator_hindi(from_text) return result2[0]['translation_text'] def score_comment(comment): vectorized_comment = vectorizer([comment]) results = model.predict(vectorized_comment) text = '' for idx, col in enumerate(new_data.columns[2:]): text += '{}: {}\n'.format(col, results[0][idx] > 0.5) return text def combined_models(input): translated_text = translate_hindi(input) toxic_score = score_comment(input) return translated_text, toxic_score interface = gr.Interface(fn=combined_models, inputs="text", outputs=["text", "text"], title="Toxic Comment Analyzer") interface.launch(share=True)