Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>LLM Benchmark overview</title> | |
<style> | |
body { | |
font-family: Arial, sans-serif; | |
background-color: #fdf6fb; | |
color: #333; | |
margin: 0; | |
padding: 20px; | |
} | |
h1 { | |
text-align: center; | |
color: #d16ba5; | |
} | |
.table-container { | |
overflow-x: auto; | |
margin-top: 20px; | |
position: relative; | |
} | |
table { | |
width: 100%; | |
border-collapse: collapse; | |
margin: 0 auto; | |
background-color: #fff; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
table-layout: fixed; | |
} | |
th, td { | |
padding: 10px; | |
text-align: left; | |
border: 1px solid #ddd; | |
overflow: hidden; | |
text-overflow: ellipsis; | |
white-space: nowrap; | |
position: relative; | |
} | |
th { | |
background-color: #f7d9eb; | |
color: #333; | |
font-weight: bold; | |
} | |
th.resizable { | |
position: relative; | |
} | |
th.resizable .resizer { | |
position: absolute; | |
top: 0; | |
right: 0; | |
width: 5px; | |
height: 100%; | |
cursor: col-resize; | |
background-color: transparent; | |
} | |
td.expandable { | |
cursor: pointer; | |
} | |
td:nth-child(2) { | |
background-color: #fcebf7; | |
} | |
.filter { | |
margin-bottom: 20px; | |
text-align: center; | |
} | |
.filter label { | |
font-size: 16px; | |
margin-right: 10px; | |
color: #d16ba5; | |
} | |
.filter select { | |
padding: 5px; | |
font-size: 16px; | |
border: 1px solid #ccc; | |
border-radius: 5px; | |
} | |
.expanded { | |
white-space: normal; | |
background-color: #fcebf7; | |
} | |
.modal { | |
position: fixed; | |
top: 50%; | |
left: 50%; | |
transform: translate(-50%, -50%); | |
background-color: #fff; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
padding: 20px; | |
z-index: 1000; | |
border-radius: 10px; | |
max-width: 80%; | |
max-height: 80%; | |
overflow: auto; | |
} | |
.overlay { | |
position: fixed; | |
top: 0; | |
left: 0; | |
width: 100%; | |
height: 100%; | |
background: rgba(0, 0, 0, 0.5); | |
z-index: 999; | |
white-space: pre-wrap; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>LLM Benchmark overview</h1> | |
<div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div> | |
<div class="filter"> | |
<label for="metricFilter">Filter by Evaluated task:</label> | |
<select id="metricFilter"> | |
<option value="">All</option> | |
</select> | |
<h2></h2> | |
<input type="text" id="searchInput" placeholder="Search for benchmark names ..." style="margin-bottom: 10px; padding: 8px; width: 100%;"> | |
</div> | |
<div class="table-container"> | |
<table id="csvTable"> | |
<thead> | |
<!-- Headers will be dynamically added --> | |
</thead> | |
<tbody> | |
<!-- Rows will be dynamically added here --> | |
</tbody> | |
</table> | |
</div> | |
<div class="overlay" id="overlay" style="display: none;"></div> | |
<div class="modal" id="modal" style="display: none;"></div> | |
<script> | |
function parseCSV(content) { | |
const rows = []; | |
let currentRow = []; | |
let currentField = ''; | |
let insideQuotes = false; | |
for (let i = 0; i < content.length; i++) { | |
const char = content[i]; | |
if (char === '"') { | |
insideQuotes = !insideQuotes; | |
} else if (char === ',' && !insideQuotes) { | |
currentRow.push(currentField.trim()); | |
currentField = ''; | |
} else if (char === '\n' && !insideQuotes) { | |
currentRow.push(currentField.trim()); | |
rows.push(currentRow); | |
currentRow = []; | |
currentField = ''; | |
} else { | |
currentField += char; | |
} | |
} | |
if (currentField) currentRow.push(currentField.trim()); | |
if (currentRow.length > 0) rows.push(currentRow); | |
const headers = rows.shift(); | |
return { headers, rows }; | |
} | |
async function loadCSVFromHuggingFace(dataset, filename, token) { | |
const url = `https://huggingface.co/datasets/${dataset}/resolve/main/${filename}`; | |
const response = await fetch(url, { | |
headers: { | |
'Authorization': `Bearer ${token}`, | |
}, | |
}); | |
if (!response.ok) { | |
throw new Error(`Failed to fetch file: ${response.statusText}`); | |
} | |
const content = await response.text(); | |
return parseCSV(content); | |
} | |
const metricFilter = document.getElementById('metricFilter'); | |
const table = document.getElementById('csvTable'); | |
const tableHead = table.querySelector('thead'); | |
const tableBody = table.querySelector('tbody'); | |
const overlay = document.getElementById('overlay'); | |
const modal = document.getElementById('modal'); | |
document.getElementById('searchInput').addEventListener('input', function () { | |
const filter = this.value.trim().toLowerCase(); // Normalize input | |
const table = document.getElementById('csvTable'); | |
const rows = table.querySelectorAll('tbody tr'); | |
rows.forEach(row => { | |
const nameCell = row.cells[1]; | |
if (nameCell) { | |
const name = nameCell.textContent.trim().toLowerCase(); | |
row.style.display = name.includes(filter) ? '' : 'none'; | |
} | |
}); | |
}); | |
function makeResizable() { | |
const thElements = document.querySelectorAll('th'); | |
thElements.forEach(th => { | |
const resizer = document.createElement('div'); | |
resizer.classList.add('resizer'); | |
th.appendChild(resizer); | |
let startX; | |
let startWidth; | |
resizer.addEventListener('mousedown', (e) => { | |
startX = e.pageX; | |
startWidth = th.offsetWidth; | |
document.addEventListener('mousemove', resizeColumn); | |
document.addEventListener('mouseup', stopResize); | |
}); | |
function resizeColumn(e) { | |
const newWidth = startWidth + (e.pageX - startX); | |
th.style.width = `${newWidth}px`; | |
} | |
function stopResize() { | |
document.removeEventListener('mousemove', resizeColumn); | |
document.removeEventListener('mouseup', stopResize); | |
} | |
}); | |
} | |
function populateFilterOptions(data, headerIndex) { | |
const uniqueMetricTypes = [...new Set(data.map(row => row[headerIndex]))]; | |
uniqueMetricTypes.forEach(type => { | |
const option = document.createElement('option'); | |
option.value = type; | |
option.textContent = type; | |
metricFilter.appendChild(option); | |
}); | |
} | |
function populateTable(headers, rows, filterValue, headerIndex) { | |
tableHead.innerHTML = ''; | |
tableBody.innerHTML = ''; | |
const headerRow = document.createElement('tr'); | |
headers.forEach(header => { | |
const th = document.createElement('th'); | |
th.textContent = header; | |
th.classList.add('resizable'); | |
headerRow.appendChild(th); | |
}); | |
tableHead.appendChild(headerRow); | |
rows | |
.filter(row => !filterValue || row[headerIndex] === filterValue) | |
.sort((a, b) => a[0].localeCompare(b[0])) | |
.forEach(row => { | |
const tr = document.createElement('tr'); | |
row.forEach((value, index) => { | |
const td = document.createElement('td'); | |
if (headers[index] === 'Paper' && value) { | |
const link = document.createElement('a'); | |
link.href = value; | |
link.textContent = 'paper link'; | |
link.target = '_blank'; | |
td.appendChild(link); | |
} else if (headers[index] === 'HF or Git link' && value) { | |
const link = document.createElement('a'); | |
link.href = value; | |
link.textContent = 'dataset link'; | |
link.target = '_blank'; | |
td.appendChild(link); | |
} else { | |
td.textContent = value; | |
} | |
td.classList.add('expandable'); | |
td.title = 'Click to expand'; | |
td.addEventListener('click', () => { | |
overlay.style.display = 'block'; | |
modal.style.display = 'block'; | |
modal.textContent = value; | |
modal.style.whiteSpace = 'pre-wrap'; | |
}); | |
tr.appendChild(td); | |
}); | |
tableBody.appendChild(tr); | |
}); | |
makeResizable(); | |
} | |
overlay.addEventListener('click', () => { | |
overlay.style.display = 'none'; | |
modal.style.display = 'none'; | |
}); | |
metricFilter.addEventListener('change', () => { | |
const filterValue = metricFilter.value; | |
populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0); | |
}); | |
let parsedCSV; | |
loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN).then(({ headers, rows }) => { | |
parsedCSV = { headers, rows }; | |
populateFilterOptions(rows, 0); | |
populateTable(headers, rows, '', 0); | |
}); | |
</script> | |
</body> | |
</html> |