Spaces:

UlrickBL
/

benchmark_overview

Running

App Files Files Community

benchmark_overview / index.html

UlrickBL

Update index.html

0a286b0 verified about 2 months ago

raw

history blame contribute delete

11.3 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>LLM Benchmark overview</title>
	<style>
	body {
	font-family: Arial, sans-serif;
	background-color: #fdf6fb;
	color: #333;
	margin: 0;
	padding: 20px;
	}
	h1 {
	text-align: center;
	color: #d16ba5;
	}
	.table-container {
	overflow-x: auto;
	margin-top: 20px;
	position: relative;
	}
	table {
	width: 100%;
	border-collapse: collapse;
	margin: 0 auto;
	background-color: #fff;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	table-layout: fixed;
	}
	th, td {
	padding: 10px;
	text-align: left;
	border: 1px solid #ddd;
	overflow: hidden;
	text-overflow: ellipsis;
	white-space: nowrap;
	position: relative;
	}
	th {
	background-color: #f7d9eb;
	color: #333;
	font-weight: bold;
	}
	th.resizable {
	position: relative;
	}
	th.resizable .resizer {
	position: absolute;
	top: 0;
	right: 0;
	width: 5px;
	height: 100%;
	cursor: col-resize;
	background-color: transparent;
	}
	td.expandable {
	cursor: pointer;
	}
	td:nth-child(2) {
	background-color: #fcebf7;
	}
	.filter {
	margin-bottom: 20px;
	text-align: center;
	}
	.filter label {
	font-size: 16px;
	margin-right: 10px;
	color: #d16ba5;
	}
	.filter select {
	padding: 5px;
	font-size: 16px;
	border: 1px solid #ccc;
	border-radius: 5px;
	}
	.expanded {
	white-space: normal;
	background-color: #fcebf7;
	}
	.modal {
	position: fixed;
	top: 50%;
	left: 50%;
	transform: translate(-50%, -50%);
	background-color: #fff;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
	padding: 20px;
	z-index: 1000;
	border-radius: 10px;
	max-width: 80%;
	max-height: 80%;
	overflow: auto;
	}
	.overlay {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background: rgba(0, 0, 0, 0.5);
	z-index: 999;
	white-space: pre-wrap;
	}
	</style>
	</head>
	<body>
	<h1>LLM Benchmark overview</h1>
	<div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div>
	<div class="filter">
	<label for="metricFilter">Filter by Evaluated task:</label>
	<select id="metricFilter">
	<option value="">All</option>
	</select>
	<h2></h2>
	<input type="text" id="searchInput" placeholder="Search for benchmark names ..." style="margin-bottom: 10px; padding: 8px; width: 100%;">
	</div>
	<div class="table-container">
	<table id="csvTable">
	<thead>
	<!-- Headers will be dynamically added -->
	</thead>
	<tbody>
	<!-- Rows will be dynamically added here -->
	</tbody>
	</table>
	</div>

	<div class="overlay" id="overlay" style="display: none;"></div>
	<div class="modal" id="modal" style="display: none;"></div>

	<script>
	function parseCSV(content) {
	const rows = [];
	let currentRow = [];
	let currentField = '';
	let insideQuotes = false;
	for (let i = 0; i < content.length; i++) {
	const char = content[i];
	if (char === '"') {
	insideQuotes = !insideQuotes;
	} else if (char === ',' && !insideQuotes) {
	currentRow.push(currentField.trim());
	currentField = '';
	} else if (char === '\n' && !insideQuotes) {
	currentRow.push(currentField.trim());
	rows.push(currentRow);
	currentRow = [];
	currentField = '';
	} else {
	currentField += char;
	}
	}
	if (currentField) currentRow.push(currentField.trim());
	if (currentRow.length > 0) rows.push(currentRow);
	const headers = rows.shift();
	return { headers, rows };
	}
	async function loadCSVFromHuggingFace(dataset, filename, token) {
	const url = `https://huggingface.co/datasets/${dataset}/resolve/main/${filename}`;

	const response = await fetch(url, {
	headers: {
	'Authorization': `Bearer ${token}`,
	},
	});

	if (!response.ok) {
	throw new Error(`Failed to fetch file: ${response.statusText}`);
	}

	const content = await response.text();

	return parseCSV(content);
	}
	const metricFilter = document.getElementById('metricFilter');
	const table = document.getElementById('csvTable');
	const tableHead = table.querySelector('thead');
	const tableBody = table.querySelector('tbody');
	const overlay = document.getElementById('overlay');
	const modal = document.getElementById('modal');

	document.getElementById('searchInput').addEventListener('input', function () {
	const filter = this.value.trim().toLowerCase(); // Normalize input
	const table = document.getElementById('csvTable');
	const rows = table.querySelectorAll('tbody tr');

	rows.forEach(row => {
	const nameCell = row.cells[1];
	if (nameCell) {
	const name = nameCell.textContent.trim().toLowerCase();
	row.style.display = name.includes(filter) ? '' : 'none';
	}
	});
	});

	function makeResizable() {
	const thElements = document.querySelectorAll('th');
	thElements.forEach(th => {
	const resizer = document.createElement('div');
	resizer.classList.add('resizer');
	th.appendChild(resizer);
	let startX;
	let startWidth;
	resizer.addEventListener('mousedown', (e) => {
	startX = e.pageX;
	startWidth = th.offsetWidth;
	document.addEventListener('mousemove', resizeColumn);
	document.addEventListener('mouseup', stopResize);
	});
	function resizeColumn(e) {
	const newWidth = startWidth + (e.pageX - startX);
	th.style.width = `${newWidth}px`;
	}
	function stopResize() {
	document.removeEventListener('mousemove', resizeColumn);
	document.removeEventListener('mouseup', stopResize);
	}
	});
	}
	function populateFilterOptions(data, headerIndex) {
	const uniqueMetricTypes = [...new Set(data.map(row => row[headerIndex]))];
	uniqueMetricTypes.forEach(type => {
	const option = document.createElement('option');
	option.value = type;
	option.textContent = type;
	metricFilter.appendChild(option);
	});
	}
	function populateTable(headers, rows, filterValue, headerIndex) {
	tableHead.innerHTML = '';
	tableBody.innerHTML = '';
	const headerRow = document.createElement('tr');
	headers.forEach(header => {
	const th = document.createElement('th');
	th.textContent = header;
	th.classList.add('resizable');
	headerRow.appendChild(th);
	});
	tableHead.appendChild(headerRow);
	rows
	.filter(row => !filterValue \|\| row[headerIndex] === filterValue)
	.sort((a, b) => a[0].localeCompare(b[0]))
	.forEach(row => {
	const tr = document.createElement('tr');
	row.forEach((value, index) => {
	const td = document.createElement('td');
	if (headers[index] === 'Paper' && value) {
	const link = document.createElement('a');
	link.href = value;
	link.textContent = 'paper link';
	link.target = '_blank';
	td.appendChild(link);
	} else if (headers[index] === 'HF or Git link' && value) {
	const link = document.createElement('a');
	link.href = value;
	link.textContent = 'dataset link';
	link.target = '_blank';
	td.appendChild(link);
	} else {
	td.textContent = value;
	}
	td.classList.add('expandable');
	td.title = 'Click to expand';
	td.addEventListener('click', () => {
	overlay.style.display = 'block';
	modal.style.display = 'block';
	modal.textContent = value;
	modal.style.whiteSpace = 'pre-wrap';
	});
	tr.appendChild(td);
	});
	tableBody.appendChild(tr);
	});
	makeResizable();
	}
	overlay.addEventListener('click', () => {
	overlay.style.display = 'none';
	modal.style.display = 'none';
	});
	metricFilter.addEventListener('change', () => {
	const filterValue = metricFilter.value;
	populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0);
	});
	let parsedCSV;
	loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN).then(({ headers, rows }) => {
	parsedCSV = { headers, rows };
	populateFilterOptions(rows, 0);
	populateTable(headers, rows, '', 0);
	});
	</script>
	</body>
	</html>