Spaces:

AhmedSSabir
/

Demo-for-Gender-Score

Sleeping

App Files Files Community

Demo-for-Gender-Score / app.py

AhmedSSabir

Update app.py

0d826b8 verified 7 months ago

raw

history blame

4.27 kB

	#!/usr/bin/env python3
	from doctest import OutputChecker
	import sys
	import torch
	import re
	import os
	import gradio as gr
	import requests
	import torch
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	from torch.nn.functional import softmax
	import numpy as np

	# just for the sake of this demo, we use cloze prob to initialize the hypothesis

	#url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
	#resp = requests.get(url)

	from sentence_transformers import SentenceTransformer, util

	model_sts = SentenceTransformer('stsb-distilbert-base')
	#model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
	#batch_size = 1
	#scorer = LMScorer.from_pretrained('gpt2' , device=device, batch_size=batch_size)

	#import torch
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import numpy as np
	import re



	def get_sim(x):
	x = str(x)[1:-1]
	x = str(x)[1:-1]
	return x


	# Load pre-trained model

	#model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
	#model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
	#model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)

	#model.eval()
	#tokenizer = gr.Interface.load('huggingface/distilgpt2')

	#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
	#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
	#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')



	tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
	model = GPT2LMHeadModel.from_pretrained('gpt2')



	def sentence_prob_mean(text):
	# Tokenize the input text and add special tokens
	input_ids = tokenizer.encode(text, return_tensors='pt')

	# Obtain model outputs
	with torch.no_grad():
	outputs = model(input_ids, labels=input_ids)
	logits = outputs.logits # logits are the model outputs before applying softmax

	# Shift logits and labels so that tokens are aligned:
	shift_logits = logits[..., :-1, :].contiguous()
	shift_labels = input_ids[..., 1:].contiguous()

	# Calculate the softmax probabilities
	probs = softmax(shift_logits, dim=-1)

	# Gather the probabilities of the actual token IDs
	gathered_probs = torch.gather(probs, 2, shift_labels.unsqueeze(-1)).squeeze(-1)

	# Compute the mean probability across the tokens
	mean_prob = torch.mean(gathered_probs).item()

	return mean_prob


	def cos_sim(a, b):
	return np.inner(a, b) / (np.linalg.norm(a) * (np.linalg.norm(b)))




	def Visual_re_ranker(caption_man, caption_woman, context_label, context_prob):
	caption_man = caption_man
	caption_woman = caption_woman
	context_label= context_label
	context_prob = context_prob
	caption_emb_man = model_sts.encode(caption_man, convert_to_tensor=True)
	caption_emb_woman = model_sts.encode(caption_woman, convert_to_tensor=True)
	context_label_emb = model_sts.encode(context_label, convert_to_tensor=True)

	sim_m = cosine_scores = util.pytorch_cos_sim(caption_emb_man, context_label_emb)
	sim_m = sim_m.cpu().numpy()
	sim_m = get_sim(sim_m)

	sim_w = cosine_scores = util.pytorch_cos_sim(caption_emb_woman, context_label_emb)
	sim_w = sim_w.cpu().numpy()
	sim_w = get_sim(sim_w)


	LM_man = sentence_prob_mean(caption_man)
	LM_woman = sentence_prob_mean(caption_woman)






	#LM = scorer.sentence_score(caption, reduce="mean")
	score_man = pow(float(LM_man),pow((1-float(sim_m))/(1+ float(sim_m)),1-float(context_prob)))
	score_woman = pow(float(LM_woman),pow((1-float(sim_w))/(1+ float(sim_w)),1-float(context_prob)))


	#return {"LM": float(LM)/1, "sim": float(sim)/1, "score": float(score)/1 }
	return {"Man": float(score_man)/1, "Woman": float(score_woman)/1}
	#return LM, sim, score






	demo = gr.Interface(
	fn=Visual_re_ranker,
	description="Demo for Women Wearing Lipstick: Measuring the Bias Between Object and Its Related Gender (distilbert)",
	inputs=[gr.Textbox(value="a man riding a motorcycle on a road") , gr.Textbox(value="a woman riding a motorcycle on a road"), gr.Textbox(value="motor scooter"), gr.Textbox(value="0.2183")],



	outputs="label",
	)
	demo.launch()