Spaces:

soldni
/

viz_summaries

Sleeping

App Files Files Community

viz_summaries / app.py

soldni

more colors

9d0bbec about 2 years ago

raw

history blame contribute delete

4.57 kB

	from io import StringIO
	import itertools

	import gradio as gr
	import pandas as pd
	import spacy


	nlp = spacy.load('en_core_web_sm')

	HTML_RED = '<span style="background-color: rgba(255, 0, 0, 0.2)">{t}</span>'
	HTML_GRN = '<span style="background-color: rgba(0, 255, 0, 0.3)">{t}</span>'
	HTML_YLW = '<span style="background-color: rgba(255, 255, 0, 0.3)">{t}</span>'
	HTML_BLU = '<span style="background-color: rgba(0, 0, 255, 0.2)">{t}</span>'
	HTML_PLN = '<span>{t}</span>'
	TABLE_CSS = '''
	th, td {
	padding: 4px;
	}
	table, th, td {
	border: 1px solid black;
	border-collapse: collapse;

	}
	'''


	def colorize(file_obj):
	with open(file_obj.name, 'r') as f:
	raw = f.read()
	raw = raw[raw.find('example_id'):]
	data = pd.read_csv(StringIO(raw))

	table_content = []

	for row in data.iterrows():
	id_ = row[1]['example_id']
	gold, genA, genB = nlp.pipe((
	row[1]['target summary'],
	row[1]['model summary A'],
	row[1]['model summary B']
	))
	tokens_gold = {token.lemma_.lower(): 0 for token in gold}
	for token in itertools.chain(genA, genB):
	if token.lemma_.lower() in tokens_gold:
	tokens_gold[token.lemma_.lower()] += 1

	gold_text = ''.join([
	(
	HTML_PLN.format(t=token.text)
	if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
	else (
	(
	HTML_BLU if tokens_gold[token.lemma_.lower()] > 0
	else HTML_YLW
	).format(t=token.text)
	)
	) + token.whitespace_
	for token in gold
	])
	table_content.append(
	[id_, gold_text] +
	[
	''.join(
	(
	HTML_PLN.format(t=token.text)
	if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
	else (
	HTML_GRN.format(t=token.text)
	if token.lemma_.lower() in tokens_gold
	else HTML_RED.format(t=token.text)
	)
	) + token.whitespace_
	for token in gen
	)
	for gen in (genA, genB)
	]
	)

	# return an HTML table using data in table_content
	return '\n'.join((
	'<table>',
	"<tr>"
	"<td><b>id</b></td>",
	"<td><b>Gold</b></td>",
	"<td><b>Model A</b></td>",
	"<td><b>Model B</b></td>",
	"</tr>",
	'\n'.join(
	'<tr>\n' +
	'\n'.join('<td>{}</td>'.format(cell) for cell in row) +
	'\n</tr>'
	for row in table_content
	),
	'</table>'
	))


	def main():
	with gr.Blocks(css=TABLE_CSS) as demo:
	gr.Markdown(
	"After uploading, click Run and switch to the Visualization tab."
	)
	with gr.Tabs():
	with gr.TabItem("Upload"):
	data = gr.File(
	label='upload csv with Annotations', type='file'
	)
	run = gr.Button(label='Run')
	with gr.TabItem("Visualization"):
	gr.HTML(
	''.join(
	(
	"<b>Explanation of colors:</b>",
	"<br><ul>",
	"<li><b>",
	HTML_RED.format(t='Red'),
	"</b>: word is in generated, but not in gold.</li>",
	"<li><b>",
	HTML_GRN.format(t='Green'),
	"</b>: word is in generated summary and gold.</li>",
	"<li><b>",
	HTML_YLW.format(t='Yellow'),
	"</b>: word is in gold, but not in generated.</li>",
	"<li><b>",
	HTML_BLU.format(t='Blue'),
	"</b>: word is in gold and in generated.</li>",
	"</ul>",
	"<br>",
	"<b>Important</b>: Only nouns, verbs and proper ",
	"nouns are colored.</b>"
	)
	)
	)
	viz = gr.HTML(label='Upload a csv file to start.')
	run.click(colorize, data, viz)

	demo.launch()


	if __name__ == '__main__':
	main()