Spaces:
Runtime error
Runtime error
saurabhg2083
commited on
Commit
•
44663cf
1
Parent(s):
2108543
app.py
Browse files
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
import string
|
6 |
+
import textwrap
|
7 |
+
from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForCausalLM, AutoTokenizer, pipeline, AdamW
|
8 |
+
from happytransformer import HappyTextToText, TTSettings
|
9 |
+
import torch
|
10 |
+
from torch.nn import BCEWithLogitsLoss
|
11 |
+
from torch.utils.data import DataLoader, TensorDataset, random_split
|
12 |
+
from happytransformer import HappyTextToText, TTSettings
|
13 |
+
|
14 |
+
|
15 |
+
pipe = pipeline("text-classification", model="saurabhg2083/model_bert")
|
16 |
+
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
|
17 |
+
args = TTSettings(num_beams=5, min_length=1)
|
18 |
+
model = BertForSequenceClassification.from_pretrained(save_directory)
|
19 |
+
tokenizer = BertTokenizer.from_pretrained(save_directory)
|
20 |
+
|
21 |
+
|
22 |
+
gendered_pronouns = [
|
23 |
+
'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive',
|
24 |
+
'strong', 'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous',
|
25 |
+
'ping pong', 'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive',
|
26 |
+
'ambitious', 'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced',
|
27 |
+
'adventurous', 'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage',
|
28 |
+
'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health',
|
29 |
+
'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible',
|
30 |
+
'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal',
|
31 |
+
'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel',
|
32 |
+
'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her',
|
33 |
+
'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship',
|
34 |
+
'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield',
|
35 |
+
'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven',
|
36 |
+
'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native',
|
37 |
+
'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo',
|
38 |
+
'latino', 'latina', 'migrant', 'blacklist', 'whitelist'
|
39 |
+
]
|
40 |
+
|
41 |
+
# List of neutral words
|
42 |
+
neutral_words = [
|
43 |
+
'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive', 'strong',
|
44 |
+
'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous', 'ping pong',
|
45 |
+
'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive', 'ambitious',
|
46 |
+
'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced', 'adventurous',
|
47 |
+
'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage',
|
48 |
+
'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health',
|
49 |
+
'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible',
|
50 |
+
'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal',
|
51 |
+
'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel',
|
52 |
+
'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her',
|
53 |
+
'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship',
|
54 |
+
'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield',
|
55 |
+
'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven',
|
56 |
+
'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native',
|
57 |
+
'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo', 'latino',
|
58 |
+
'latina', 'migrant', 'blacklist', 'whitelist'
|
59 |
+
]
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
def replace_gendered_pronouns(text):
|
64 |
+
# Define a dictionary of gendered pronouns and their gender-neutral replacements
|
65 |
+
word_dict = dict(zip(gendered_pronouns, neutral_words))
|
66 |
+
|
67 |
+
# Use regular expressions to find and replace gendered pronouns in the text
|
68 |
+
for pronoun, replacement in word_dict.items():
|
69 |
+
# Use word boundaries to match whole words only
|
70 |
+
pattern = r'\b' + re.escape(pronoun) + r'\b'
|
71 |
+
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
72 |
+
|
73 |
+
return text
|
74 |
+
|
75 |
+
def model_eval(text):
|
76 |
+
# Put the model in evaluation mode
|
77 |
+
model.eval()
|
78 |
+
|
79 |
+
# Input text
|
80 |
+
input_text = text
|
81 |
+
|
82 |
+
# Tokenize the input text
|
83 |
+
inputs = tokenizer(input_text, padding='max_length', truncation=True, max_length=512, return_tensors="pt")
|
84 |
+
|
85 |
+
# Make the prediction
|
86 |
+
with torch.no_grad():
|
87 |
+
outputs = model(**inputs)
|
88 |
+
|
89 |
+
logits = outputs.logits
|
90 |
+
predicted_label = (logits > 0).int().item()
|
91 |
+
|
92 |
+
return predicted_label
|
93 |
+
|
94 |
+
|
95 |
+
st.title("Job Bias Testing")
|
96 |
+
|
97 |
+
text1 = st.text_area("Enter Text 1")
|
98 |
+
|
99 |
+
if st.button("Calculate Similarity"):
|
100 |
+
if text1:
|
101 |
+
predicted_label = model_eval(text1)
|
102 |
+
# Convert 0 or 1 label back to a meaningful label if needed
|
103 |
+
label_mapping = {0: "Negative", 1: "Positive"}
|
104 |
+
predicted_label_text = label_mapping[predicted_label]
|
105 |
+
#print(f"Predicted Label: {predicted_label_text}")
|
106 |
+
if predicted_label_text == "Positive":
|
107 |
+
rewritten_sentence = replace_gendered_pronouns(text1)
|
108 |
+
# Add the prefix "grammar: " before each input
|
109 |
+
result = happy_tt.generate_text("grammar: "+rewritten_sentence, args=args)
|
110 |
+
#print(result.text) # This sentence has bad grammar.
|
111 |
+
st.success(f"Predicted Label: {predicted_label_text} and new Text is: " {result.text})
|
112 |
+
else:
|
113 |
+
st.warning("Please enter text Job Description.")
|