benchmark_overview / index.html
UlrickBL's picture
Update index.html
0a286b0 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Benchmark overview</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #fdf6fb;
color: #333;
margin: 0;
padding: 20px;
}
h1 {
text-align: center;
color: #d16ba5;
}
.table-container {
overflow-x: auto;
margin-top: 20px;
position: relative;
}
table {
width: 100%;
border-collapse: collapse;
margin: 0 auto;
background-color: #fff;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
table-layout: fixed;
}
th, td {
padding: 10px;
text-align: left;
border: 1px solid #ddd;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
position: relative;
}
th {
background-color: #f7d9eb;
color: #333;
font-weight: bold;
}
th.resizable {
position: relative;
}
th.resizable .resizer {
position: absolute;
top: 0;
right: 0;
width: 5px;
height: 100%;
cursor: col-resize;
background-color: transparent;
}
td.expandable {
cursor: pointer;
}
td:nth-child(2) {
background-color: #fcebf7;
}
.filter {
margin-bottom: 20px;
text-align: center;
}
.filter label {
font-size: 16px;
margin-right: 10px;
color: #d16ba5;
}
.filter select {
padding: 5px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 5px;
}
.expanded {
white-space: normal;
background-color: #fcebf7;
}
.modal {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
background-color: #fff;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
padding: 20px;
z-index: 1000;
border-radius: 10px;
max-width: 80%;
max-height: 80%;
overflow: auto;
}
.overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.5);
z-index: 999;
white-space: pre-wrap;
}
</style>
</head>
<body>
<h1>LLM Benchmark overview</h1>
<div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div>
<div class="filter">
<label for="metricFilter">Filter by Evaluated task:</label>
<select id="metricFilter">
<option value="">All</option>
</select>
<h2></h2>
<input type="text" id="searchInput" placeholder="Search for benchmark names ..." style="margin-bottom: 10px; padding: 8px; width: 100%;">
</div>
<div class="table-container">
<table id="csvTable">
<thead>
<!-- Headers will be dynamically added -->
</thead>
<tbody>
<!-- Rows will be dynamically added here -->
</tbody>
</table>
</div>
<div class="overlay" id="overlay" style="display: none;"></div>
<div class="modal" id="modal" style="display: none;"></div>
<script>
function parseCSV(content) {
const rows = [];
let currentRow = [];
let currentField = '';
let insideQuotes = false;
for (let i = 0; i < content.length; i++) {
const char = content[i];
if (char === '"') {
insideQuotes = !insideQuotes;
} else if (char === ',' && !insideQuotes) {
currentRow.push(currentField.trim());
currentField = '';
} else if (char === '\n' && !insideQuotes) {
currentRow.push(currentField.trim());
rows.push(currentRow);
currentRow = [];
currentField = '';
} else {
currentField += char;
}
}
if (currentField) currentRow.push(currentField.trim());
if (currentRow.length > 0) rows.push(currentRow);
const headers = rows.shift();
return { headers, rows };
}
async function loadCSVFromHuggingFace(dataset, filename, token) {
const url = `https://huggingface.co/datasets/${dataset}/resolve/main/${filename}`;
const response = await fetch(url, {
headers: {
'Authorization': `Bearer ${token}`,
},
});
if (!response.ok) {
throw new Error(`Failed to fetch file: ${response.statusText}`);
}
const content = await response.text();
return parseCSV(content);
}
const metricFilter = document.getElementById('metricFilter');
const table = document.getElementById('csvTable');
const tableHead = table.querySelector('thead');
const tableBody = table.querySelector('tbody');
const overlay = document.getElementById('overlay');
const modal = document.getElementById('modal');
document.getElementById('searchInput').addEventListener('input', function () {
const filter = this.value.trim().toLowerCase(); // Normalize input
const table = document.getElementById('csvTable');
const rows = table.querySelectorAll('tbody tr');
rows.forEach(row => {
const nameCell = row.cells[1];
if (nameCell) {
const name = nameCell.textContent.trim().toLowerCase();
row.style.display = name.includes(filter) ? '' : 'none';
}
});
});
function makeResizable() {
const thElements = document.querySelectorAll('th');
thElements.forEach(th => {
const resizer = document.createElement('div');
resizer.classList.add('resizer');
th.appendChild(resizer);
let startX;
let startWidth;
resizer.addEventListener('mousedown', (e) => {
startX = e.pageX;
startWidth = th.offsetWidth;
document.addEventListener('mousemove', resizeColumn);
document.addEventListener('mouseup', stopResize);
});
function resizeColumn(e) {
const newWidth = startWidth + (e.pageX - startX);
th.style.width = `${newWidth}px`;
}
function stopResize() {
document.removeEventListener('mousemove', resizeColumn);
document.removeEventListener('mouseup', stopResize);
}
});
}
function populateFilterOptions(data, headerIndex) {
const uniqueMetricTypes = [...new Set(data.map(row => row[headerIndex]))];
uniqueMetricTypes.forEach(type => {
const option = document.createElement('option');
option.value = type;
option.textContent = type;
metricFilter.appendChild(option);
});
}
function populateTable(headers, rows, filterValue, headerIndex) {
tableHead.innerHTML = '';
tableBody.innerHTML = '';
const headerRow = document.createElement('tr');
headers.forEach(header => {
const th = document.createElement('th');
th.textContent = header;
th.classList.add('resizable');
headerRow.appendChild(th);
});
tableHead.appendChild(headerRow);
rows
.filter(row => !filterValue || row[headerIndex] === filterValue)
.sort((a, b) => a[0].localeCompare(b[0]))
.forEach(row => {
const tr = document.createElement('tr');
row.forEach((value, index) => {
const td = document.createElement('td');
if (headers[index] === 'Paper' && value) {
const link = document.createElement('a');
link.href = value;
link.textContent = 'paper link';
link.target = '_blank';
td.appendChild(link);
} else if (headers[index] === 'HF or Git link' && value) {
const link = document.createElement('a');
link.href = value;
link.textContent = 'dataset link';
link.target = '_blank';
td.appendChild(link);
} else {
td.textContent = value;
}
td.classList.add('expandable');
td.title = 'Click to expand';
td.addEventListener('click', () => {
overlay.style.display = 'block';
modal.style.display = 'block';
modal.textContent = value;
modal.style.whiteSpace = 'pre-wrap';
});
tr.appendChild(td);
});
tableBody.appendChild(tr);
});
makeResizable();
}
overlay.addEventListener('click', () => {
overlay.style.display = 'none';
modal.style.display = 'none';
});
metricFilter.addEventListener('change', () => {
const filterValue = metricFilter.value;
populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0);
});
let parsedCSV;
loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN).then(({ headers, rows }) => {
parsedCSV = { headers, rows };
populateFilterOptions(rows, 0);
populateTable(headers, rows, '', 0);
});
</script>
</body>
</html>