Spaces:
Running
Running
<script lang="ts"> | |
import fileSaver from 'file-saver'; | |
const { saveAs } = fileSaver; | |
import { onMount, getContext } from 'svelte'; | |
import dayjs from 'dayjs'; | |
import relativeTime from 'dayjs/plugin/relativeTime'; | |
dayjs.extend(relativeTime); | |
import * as ort from 'onnxruntime-web'; | |
import { AutoModel, AutoTokenizer } from '@huggingface/transformers'; | |
const EMBEDDING_MODEL = 'TaylorAI/bge-micro-v2'; | |
let tokenizer = null; | |
let model = null; | |
import { models } from '$lib/stores'; | |
import { deleteFeedbackById, exportAllFeedbacks, getAllFeedbacks } from '$lib/apis/evaluations'; | |
import FeedbackMenu from './Evaluations/FeedbackMenu.svelte'; | |
import EllipsisHorizontal from '../icons/EllipsisHorizontal.svelte'; | |
import Tooltip from '../common/Tooltip.svelte'; | |
import Badge from '../common/Badge.svelte'; | |
import Pagination from '../common/Pagination.svelte'; | |
import MagnifyingGlass from '../icons/MagnifyingGlass.svelte'; | |
import Share from '../icons/Share.svelte'; | |
import CloudArrowUp from '../icons/CloudArrowUp.svelte'; | |
import { toast } from 'svelte-sonner'; | |
import Spinner from '../common/Spinner.svelte'; | |
import DocumentArrowUpSolid from '../icons/DocumentArrowUpSolid.svelte'; | |
import DocumentArrowDown from '../icons/DocumentArrowDown.svelte'; | |
import ArrowDownTray from '../icons/ArrowDownTray.svelte'; | |
const i18n = getContext('i18n'); | |
let rankedModels = []; | |
let feedbacks = []; | |
let query = ''; | |
let page = 1; | |
let tagEmbeddings = new Map(); | |
let loaded = false; | |
let loadingLeaderboard = true; | |
let debounceTimer; | |
$: paginatedFeedbacks = feedbacks.slice((page - 1) * 10, page * 10); | |
type Feedback = { | |
id: string; | |
data: { | |
rating: number; | |
model_id: string; | |
sibling_model_ids: string[] | null; | |
reason: string; | |
comment: string; | |
tags: string[]; | |
}; | |
user: { | |
name: string; | |
profile_image_url: string; | |
}; | |
updated_at: number; | |
}; | |
type ModelStats = { | |
rating: number; | |
won: number; | |
lost: number; | |
}; | |
////////////////////// | |
// | |
// Rank models by Elo rating | |
// | |
////////////////////// | |
const rankHandler = async (similarities: Map<string, number> = new Map()) => { | |
const modelStats = calculateModelStats(feedbacks, similarities); | |
rankedModels = $models | |
.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true) | |
.map((model) => { | |
const stats = modelStats.get(model.id); | |
return { | |
...model, | |
rating: stats ? Math.round(stats.rating) : '-', | |
stats: { | |
count: stats ? stats.won + stats.lost : 0, | |
won: stats ? stats.won.toString() : '-', | |
lost: stats ? stats.lost.toString() : '-' | |
} | |
}; | |
}) | |
.sort((a, b) => { | |
if (a.rating === '-' && b.rating !== '-') return 1; | |
if (b.rating === '-' && a.rating !== '-') return -1; | |
if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating; | |
return a.name.localeCompare(b.name); | |
}); | |
loadingLeaderboard = false; | |
}; | |
function calculateModelStats( | |
feedbacks: Feedback[], | |
similarities: Map<string, number> | |
): Map<string, ModelStats> { | |
const stats = new Map<string, ModelStats>(); | |
const K = 32; | |
function getOrDefaultStats(modelId: string): ModelStats { | |
return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 }; | |
} | |
function updateStats(modelId: string, ratingChange: number, outcome: number) { | |
const currentStats = getOrDefaultStats(modelId); | |
currentStats.rating += ratingChange; | |
if (outcome === 1) currentStats.won++; | |
else if (outcome === 0) currentStats.lost++; | |
stats.set(modelId, currentStats); | |
} | |
function calculateEloChange( | |
ratingA: number, | |
ratingB: number, | |
outcome: number, | |
similarity: number | |
): number { | |
const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400)); | |
return K * (outcome - expectedScore) * similarity; | |
} | |
feedbacks.forEach((feedback) => { | |
const modelA = feedback.data.model_id; | |
const statsA = getOrDefaultStats(modelA); | |
let outcome: number; | |
switch (feedback.data.rating.toString()) { | |
case '1': | |
outcome = 1; | |
break; | |
case '-1': | |
outcome = 0; | |
break; | |
default: | |
return; // Skip invalid ratings | |
} | |
// If the query is empty, set similarity to 1, else get the similarity from the map | |
const similarity = query !== '' ? similarities.get(feedback.id) || 0 : 1; | |
const opponents = feedback.data.sibling_model_ids || []; | |
opponents.forEach((modelB) => { | |
const statsB = getOrDefaultStats(modelB); | |
const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity); | |
const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity); | |
updateStats(modelA, changeA, outcome); | |
updateStats(modelB, changeB, 1 - outcome); | |
}); | |
}); | |
return stats; | |
} | |
////////////////////// | |
// | |
// Calculate cosine similarity | |
// | |
////////////////////// | |
const cosineSimilarity = (vecA, vecB) => { | |
// Ensure the lengths of the vectors are the same | |
if (vecA.length !== vecB.length) { | |
throw new Error('Vectors must be the same length'); | |
} | |
// Calculate the dot product | |
let dotProduct = 0; | |
let normA = 0; | |
let normB = 0; | |
for (let i = 0; i < vecA.length; i++) { | |
dotProduct += vecA[i] * vecB[i]; | |
normA += vecA[i] ** 2; | |
normB += vecB[i] ** 2; | |
} | |
// Calculate the magnitudes | |
normA = Math.sqrt(normA); | |
normB = Math.sqrt(normB); | |
// Avoid division by zero | |
if (normA === 0 || normB === 0) { | |
return 0; | |
} | |
// Return the cosine similarity | |
return dotProduct / (normA * normB); | |
}; | |
const calculateMaxSimilarity = (queryEmbedding, tagEmbeddings: Map<string, number[]>) => { | |
let maxSimilarity = 0; | |
for (const tagEmbedding of tagEmbeddings.values()) { | |
const similarity = cosineSimilarity(queryEmbedding, tagEmbedding); | |
maxSimilarity = Math.max(maxSimilarity, similarity); | |
} | |
return maxSimilarity; | |
}; | |
////////////////////// | |
// | |
// Embedding functions | |
// | |
////////////////////// | |
const loadEmbeddingModel = async () => { | |
// Check if the tokenizer and model are already loaded and stored in the window object | |
if (!window.tokenizer) { | |
window.tokenizer = await AutoTokenizer.from_pretrained(EMBEDDING_MODEL); | |
} | |
if (!window.model) { | |
window.model = await AutoModel.from_pretrained(EMBEDDING_MODEL); | |
} | |
// Use the tokenizer and model from the window object | |
tokenizer = window.tokenizer; | |
model = window.model; | |
// Pre-compute embeddings for all unique tags | |
const allTags = new Set(feedbacks.flatMap((feedback) => feedback.data.tags || [])); | |
await getTagEmbeddings(Array.from(allTags)); | |
}; | |
const getEmbeddings = async (text: string) => { | |
const tokens = await tokenizer(text); | |
const output = await model(tokens); | |
// Perform mean pooling on the last hidden states | |
const embeddings = output.last_hidden_state.mean(1); | |
return embeddings.ort_tensor.data; | |
}; | |
const getTagEmbeddings = async (tags: string[]) => { | |
const embeddings = new Map(); | |
for (const tag of tags) { | |
if (!tagEmbeddings.has(tag)) { | |
tagEmbeddings.set(tag, await getEmbeddings(tag)); | |
} | |
embeddings.set(tag, tagEmbeddings.get(tag)); | |
} | |
return embeddings; | |
}; | |
const debouncedQueryHandler = async () => { | |
loadingLeaderboard = true; | |
if (query.trim() === '') { | |
rankHandler(); | |
return; | |
} | |
clearTimeout(debounceTimer); | |
debounceTimer = setTimeout(async () => { | |
const queryEmbedding = await getEmbeddings(query); | |
const similarities = new Map<string, number>(); | |
for (const feedback of feedbacks) { | |
const feedbackTags = feedback.data.tags || []; | |
const tagEmbeddings = await getTagEmbeddings(feedbackTags); | |
const maxSimilarity = calculateMaxSimilarity(queryEmbedding, tagEmbeddings); | |
similarities.set(feedback.id, maxSimilarity); | |
} | |
rankHandler(similarities); | |
}, 1500); // Debounce for 1.5 seconds | |
}; | |
$: query, debouncedQueryHandler(); | |
////////////////////// | |
// | |
// CRUD operations | |
// | |
////////////////////// | |
const deleteFeedbackHandler = async (feedbackId: string) => { | |
const response = await deleteFeedbackById(localStorage.token, feedbackId).catch((err) => { | |
toast.error(err); | |
return null; | |
}); | |
if (response) { | |
feedbacks = feedbacks.filter((f) => f.id !== feedbackId); | |
} | |
}; | |
const shareHandler = async () => { | |
toast.success($i18n.t('Redirecting you to OpenWebUI Community')); | |
// remove snapshot from feedbacks | |
const feedbacksToShare = feedbacks.map((f) => { | |
const { snapshot, user, ...rest } = f; | |
return rest; | |
}); | |
console.log(feedbacksToShare); | |
const url = 'https://openwebui.com'; | |
const tab = await window.open(`${url}/leaderboard`, '_blank'); | |
// Define the event handler function | |
const messageHandler = (event) => { | |
if (event.origin !== url) return; | |
if (event.data === 'loaded') { | |
tab.postMessage(JSON.stringify(feedbacksToShare), '*'); | |
// Remove the event listener after handling the message | |
window.removeEventListener('message', messageHandler); | |
} | |
}; | |
window.addEventListener('message', messageHandler, false); | |
}; | |
const exportHandler = async () => { | |
const _feedbacks = await exportAllFeedbacks(localStorage.token).catch((err) => { | |
toast.error(err); | |
return null; | |
}); | |
if (_feedbacks) { | |
let blob = new Blob([JSON.stringify(_feedbacks)], { | |
type: 'application/json' | |
}); | |
saveAs(blob, `feedback-history-export-${Date.now()}.json`); | |
} | |
}; | |
onMount(async () => { | |
feedbacks = await getAllFeedbacks(localStorage.token); | |
loaded = true; | |
rankHandler(); | |
}); | |
</script> | |
{#if loaded} | |
<div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between"> | |
<div class="flex md:self-center text-lg font-medium px-0.5 shrink-0 items-center"> | |
<div class=" gap-1"> | |
{$i18n.t('Leaderboard')} | |
</div> | |
<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" /> | |
<span class="text-lg font-medium text-gray-500 dark:text-gray-300 mr-1.5" | |
>{rankedModels.length}</span | |
> | |
</div> | |
<div class=" flex space-x-2"> | |
<Tooltip content={$i18n.t('Re-rank models by topic similarity')}> | |
<div class="flex flex-1"> | |
<div class=" self-center ml-1 mr-3"> | |
<MagnifyingGlass className="size-3" /> | |
</div> | |
<input | |
class=" w-full text-sm pr-4 py-1 rounded-r-xl outline-none bg-transparent" | |
bind:value={query} | |
placeholder={$i18n.t('Search')} | |
on:focus={() => { | |
loadEmbeddingModel(); | |
}} | |
/> | |
</div> | |
</Tooltip> | |
</div> | |
</div> | |
<div | |
class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5" | |
> | |
{#if loadingLeaderboard} | |
<div class=" absolute top-0 bottom-0 left-0 right-0 flex"> | |
<div class="m-auto"> | |
<Spinner /> | |
</div> | |
</div> | |
{/if} | |
{#if (rankedModels ?? []).length === 0} | |
<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1"> | |
{$i18n.t('No models found')} | |
</div> | |
{:else} | |
<table | |
class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded {loadingLeaderboard | |
? 'opacity-20' | |
: ''}" | |
> | |
<thead | |
class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5" | |
> | |
<tr class=""> | |
<th scope="col" class="px-3 py-1.5 cursor-pointer select-none w-3"> | |
{$i18n.t('RK')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 cursor-pointer select-none"> | |
{$i18n.t('Model')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit"> | |
{$i18n.t('Rating')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5"> | |
{$i18n.t('Won')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5"> | |
{$i18n.t('Lost')} | |
</th> | |
</tr> | |
</thead> | |
<tbody class=""> | |
{#each rankedModels as model, modelIdx (model.id)} | |
<tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs group"> | |
<td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit"> | |
<div class=" line-clamp-1"> | |
{model?.rating !== '-' ? modelIdx + 1 : '-'} | |
</div> | |
</td> | |
<td class="px-3 py-1.5 flex flex-col justify-center"> | |
<div class="flex items-center gap-2"> | |
<div class="flex-shrink-0"> | |
<img | |
src={model?.info?.meta?.profile_image_url ?? '/favicon.png'} | |
alt={model.name} | |
class="size-5 rounded-full object-cover shrink-0" | |
/> | |
</div> | |
<div class="font-medium text-gray-800 dark:text-gray-200 pr-4"> | |
{model.name} | |
</div> | |
</div> | |
</td> | |
<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max"> | |
{model.rating} | |
</td> | |
<td class=" px-3 py-1.5 text-right font-semibold text-green-500"> | |
<div class=" w-10"> | |
{#if model.stats.won === '-'} | |
- | |
{:else} | |
<span class="hidden group-hover:inline" | |
>{((model.stats.won / model.stats.count) * 100).toFixed(1)}%</span | |
> | |
<span class=" group-hover:hidden">{model.stats.won}</span> | |
{/if} | |
</div> | |
</td> | |
<td class="px-3 py-1.5 text-right font-semibold text-red-500"> | |
<div class=" w-10"> | |
{#if model.stats.lost === '-'} | |
- | |
{:else} | |
<span class="hidden group-hover:inline" | |
>{((model.stats.lost / model.stats.count) * 100).toFixed(1)}%</span | |
> | |
<span class=" group-hover:hidden">{model.stats.lost}</span> | |
{/if} | |
</div> | |
</td> | |
</tr> | |
{/each} | |
</tbody> | |
</table> | |
{/if} | |
</div> | |
<div class=" text-gray-500 text-xs mt-1.5 w-full flex justify-end"> | |
<div class=" text-right"> | |
<div class="line-clamp-1"> | |
ⓘ {$i18n.t( | |
'The evaluation leaderboard is based on the Elo rating system and is updated in real-time.' | |
)} | |
</div> | |
{$i18n.t( | |
'The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.' | |
)} | |
</div> | |
</div> | |
<div class="pb-4"></div> | |
<div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between"> | |
<div class="flex md:self-center text-lg font-medium px-0.5"> | |
{$i18n.t('Feedback History')} | |
<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" /> | |
<span class="text-lg font-medium text-gray-500 dark:text-gray-300">{feedbacks.length}</span> | |
</div> | |
<div> | |
<div> | |
<Tooltip content={$i18n.t('Export')}> | |
<button | |
class=" p-2 rounded-xl hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 transition font-medium text-sm flex items-center space-x-1" | |
on:click={() => { | |
exportHandler(); | |
}} | |
> | |
<ArrowDownTray className="size-3" /> | |
</button> | |
</Tooltip> | |
</div> | |
</div> | |
</div> | |
<div | |
class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5" | |
> | |
{#if (feedbacks ?? []).length === 0} | |
<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1"> | |
{$i18n.t('No feedbacks found')} | |
</div> | |
{:else} | |
<table | |
class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded" | |
> | |
<thead | |
class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5" | |
> | |
<tr class=""> | |
<th scope="col" class="px-3 text-right cursor-pointer select-none w-0"> | |
{$i18n.t('User')} | |
</th> | |
<th scope="col" class="px-3 pr-1.5 cursor-pointer select-none"> | |
{$i18n.t('Models')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit"> | |
{$i18n.t('Result')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0"> | |
{$i18n.t('Updated At')} | |
</th> | |
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0"> </th> | |
</tr> | |
</thead> | |
<tbody class=""> | |
{#each paginatedFeedbacks as feedback (feedback.id)} | |
<tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs"> | |
<td class=" py-0.5 text-right font-semibold"> | |
<div class="flex justify-center"> | |
<Tooltip content={feedback?.user?.name}> | |
<div class="flex-shrink-0"> | |
<img | |
src={feedback?.user?.profile_image_url ?? '/user.png'} | |
alt={feedback?.user?.name} | |
class="size-5 rounded-full object-cover shrink-0" | |
/> | |
</div> | |
</Tooltip> | |
</div> | |
</td> | |
<td class=" py-1 pl-3 flex flex-col"> | |
<div class="flex flex-col items-start gap-0.5 h-full"> | |
<div class="flex flex-col h-full"> | |
{#if feedback.data?.sibling_model_ids} | |
<div class="font-semibold text-gray-600 dark:text-gray-400 flex-1"> | |
{feedback.data?.model_id} | |
</div> | |
<Tooltip content={feedback.data.sibling_model_ids.join(', ')}> | |
<div class=" text-[0.65rem] text-gray-600 dark:text-gray-400 line-clamp-1"> | |
{#if feedback.data.sibling_model_ids.length > 2} | |
<!-- {$i18n.t('and {{COUNT}} more')} --> | |
{feedback.data.sibling_model_ids.slice(0, 2).join(', ')}, {$i18n.t( | |
'and {{COUNT}} more', | |
{ COUNT: feedback.data.sibling_model_ids.length - 2 } | |
)} | |
{:else} | |
{feedback.data.sibling_model_ids.join(', ')} | |
{/if} | |
</div> | |
</Tooltip> | |
{:else} | |
<div | |
class=" text-sm font-medium text-gray-600 dark:text-gray-400 flex-1 py-1.5" | |
> | |
{feedback.data?.model_id} | |
</div> | |
{/if} | |
</div> | |
</div> | |
</td> | |
<td class="px-3 py-1 text-right font-medium text-gray-900 dark:text-white w-max"> | |
<div class=" flex justify-end"> | |
{#if feedback.data.rating.toString() === '1'} | |
<Badge type="info" content={$i18n.t('Won')} /> | |
{:else if feedback.data.rating.toString() === '0'} | |
<Badge type="muted" content={$i18n.t('Draw')} /> | |
{:else if feedback.data.rating.toString() === '-1'} | |
<Badge type="error" content={$i18n.t('Lost')} /> | |
{/if} | |
</div> | |
</td> | |
<td class=" px-3 py-1 text-right font-medium"> | |
{dayjs(feedback.updated_at * 1000).fromNow()} | |
</td> | |
<td class=" px-3 py-1 text-right font-semibold"> | |
<FeedbackMenu | |
on:delete={(e) => { | |
deleteFeedbackHandler(feedback.id); | |
}} | |
> | |
<button | |
class="self-center w-fit text-sm p-1.5 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl" | |
> | |
<EllipsisHorizontal /> | |
</button> | |
</FeedbackMenu> | |
</td> | |
</tr> | |
{/each} | |
</tbody> | |
</table> | |
{/if} | |
</div> | |
{#if feedbacks.length > 0} | |
<div class=" flex flex-col justify-end w-full text-right gap-1"> | |
<div class="line-clamp-1 text-gray-500 text-xs"> | |
{$i18n.t('Help us create the best community leaderboard by sharing your feedback history!')} | |
</div> | |
<div class="flex space-x-1 ml-auto"> | |
<Tooltip | |
content={$i18n.t( | |
'To protect your privacy, only ratings, model IDs, tags, and metadata are shared from your feedback—your chat logs remain private and are not included.' | |
)} | |
> | |
<button | |
class="flex text-xs items-center px-3 py-1.5 rounded-xl bg-gray-50 hover:bg-gray-100 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-200 transition" | |
on:click={async () => { | |
shareHandler(); | |
}} | |
> | |
<div class=" self-center mr-2 font-medium line-clamp-1"> | |
{$i18n.t('Share to OpenWebUI Community')} | |
</div> | |
<div class=" self-center"> | |
<CloudArrowUp className="size-3" strokeWidth="3" /> | |
</div> | |
</button> | |
</Tooltip> | |
</div> | |
</div> | |
{/if} | |
{#if feedbacks.length > 10} | |
<Pagination bind:page count={feedbacks.length} perPage={10} /> | |
{/if} | |
<div class="pb-12"></div> | |
{/if} | |