Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import re | |
import string | |
import textwrap | |
from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForCausalLM, AutoTokenizer, pipeline, AdamW | |
from happytransformer import HappyTextToText, TTSettings | |
import torch | |
from torch.nn import BCEWithLogitsLoss | |
from torch.utils.data import DataLoader, TensorDataset, random_split | |
from happytransformer import HappyTextToText, TTSettings | |
pipe = pipeline("text-classification", model="saurabhg2083/model_bert") | |
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction") | |
args = TTSettings(num_beams=5, min_length=1) | |
model = BertForSequenceClassification.from_pretrained(save_directory) | |
tokenizer = BertTokenizer.from_pretrained(save_directory) | |
gendered_pronouns = [ | |
'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive', | |
'strong', 'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous', | |
'ping pong', 'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive', | |
'ambitious', 'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced', | |
'adventurous', 'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage', | |
'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health', | |
'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible', | |
'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal', | |
'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel', | |
'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her', | |
'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship', | |
'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield', | |
'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven', | |
'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native', | |
'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo', | |
'latino', 'latina', 'migrant', 'blacklist', 'whitelist' | |
] | |
# List of neutral words | |
neutral_words = [ | |
'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive', 'strong', | |
'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous', 'ping pong', | |
'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive', 'ambitious', | |
'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced', 'adventurous', | |
'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage', | |
'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health', | |
'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible', | |
'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal', | |
'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel', | |
'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her', | |
'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship', | |
'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield', | |
'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven', | |
'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native', | |
'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo', 'latino', | |
'latina', 'migrant', 'blacklist', 'whitelist' | |
] | |
def replace_gendered_pronouns(text): | |
# Define a dictionary of gendered pronouns and their gender-neutral replacements | |
word_dict = dict(zip(gendered_pronouns, neutral_words)) | |
# Use regular expressions to find and replace gendered pronouns in the text | |
for pronoun, replacement in word_dict.items(): | |
# Use word boundaries to match whole words only | |
pattern = r'\b' + re.escape(pronoun) + r'\b' | |
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) | |
return text | |
def model_eval(text): | |
# Put the model in evaluation mode | |
model.eval() | |
# Input text | |
input_text = text | |
# Tokenize the input text | |
inputs = tokenizer(input_text, padding='max_length', truncation=True, max_length=512, return_tensors="pt") | |
# Make the prediction | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
logits = outputs.logits | |
predicted_label = (logits > 0).int().item() | |
return predicted_label | |
st.title("Job Bias Testing") | |
text1 = st.text_area("Enter Text 1") | |
if st.button("Calculate Similarity"): | |
if text1: | |
predicted_label = model_eval(text1) | |
# Convert 0 or 1 label back to a meaningful label if needed | |
label_mapping = {0: "Negative", 1: "Positive"} | |
predicted_label_text = label_mapping[predicted_label] | |
#print(f"Predicted Label: {predicted_label_text}") | |
if predicted_label_text == "Positive": | |
rewritten_sentence = replace_gendered_pronouns(text1) | |
# Add the prefix "grammar: " before each input | |
result = happy_tt.generate_text("grammar: "+rewritten_sentence, args=args) | |
#print(result.text) # This sentence has bad grammar. | |
st.success(f"Predicted Label: {predicted_label_text} and new Text is: " {result.text}) | |
else: | |
st.warning("Please enter text Job Description.") |