vincelwt's picture
add other url
34dd66d unverified
import db, { getModels } from "@/utils/db"
import Link from "next/link"
export default async function Leaderboard() {
const [potentialPoints] = await db`SELECT SUM(points) as total FROM rubrics`
const models = await getModels()
return (
<>
<p>
Traditional LLMs benchmarks have drawbacks: they quickly become part of
training datasets and are hard to relate to in terms of real-world
use-cases.
</p>
<p>
I made this as an experiment to address these issues. Here, the dataset
is dynamic (changes every week) and composed of crowdsourced real-world
prompts.
</p>
<p>
We then use GPT-4 to grade each model's response against a set of
rubrics (more details on the about page). The prompt dataset is easily
explorable.
</p>
<p>
Everything is then stored in a Postgres database and this page shows the
raw results.
</p>
<br />
<table style={{ maxWidth: 600 }}>
<thead>
<tr>
<th width={70}>Rank</th>
<th width={250}>Model</th>
<th>Score</th>
<th>Results</th>
</tr>
</thead>
<tbody>
{models
.filter((s) => s.total_score)
.map((model, i) => (
<tr key={i}>
<td>{model.rank}</td>
<td>{model.name}</td>
<td>
{parseInt((model.total_score / potentialPoints.total) * 100)}
</td>
<td>
<Link
href={`/${model.api_id.split("/").pop().toLowerCase()}`}
>
view
</Link>
</td>
</tr>
))}
</tbody>
</table>
</>
)
}