File size: 4,998 Bytes
66b4965 e016664 8cb674a e016664 66b4965 8cb674a 3607302 4a8a2ad 3607302 c4e2aeb 3607302 66b4965 e016664 c4e2aeb cf39065 c4e2aeb 8cb674a 18566fa 8cb674a 18566fa 8cb674a 18566fa c4e2aeb 66b4965 e016664 66b4965 c4e2aeb 66b4965 e016664 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import pandas as pd
from css_html_js import custom_css
TITLE = """<h1 align="center" id="space-title">π²πΎ Malaysian RAG Embedding Leaderboard</h1>"""
INTRODUCTION_TEXT = """
π The π²πΎ Malaysian RAG Embedding Leaderboard aims to track, rank and evaluate Top-k retrieval using embedding models. All notebooks at https://github.com/mesolitica/embedding-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/Malaysian-Embedding-Leaderboard/discussions with link to the notebook.
## Dataset
π We evaluate models based on 2 datasets,
1. Research paper keyword `melayu` using Crossref, https://huggingface.co/datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-crossref-melayu-malay.jsonl
2. Epenerbitan, https://huggingface.co/datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-epenerbitan-malay.jsonl
"""
close_source = [
{
'model': 'OpenAI ADA-002',
'Crossref Melayu top-1': 0.3155939351340496,
'Crossref Melayu top-3': 0.5120996083944171,
'Crossref Melayu top-5': 0.5878100210864544,
'Crossref Melayu top-10': 0.6721558389396526,
'lom.agc.gov.my top-1': 0.19168533731640527,
'lom.agc.gov.my top-3': 0.2827981080408265,
'lom.agc.gov.my top-5': 0.322504356484939,
'lom.agc.gov.my top-10': 0.36855862584017923,
}
]
open_source = [
{
'model': '[llama2-embedding-600m-8k](https://huggingface.co/mesolitica/llama2-embedding-600m-8k)',
'Crossref Melayu top-1': 0.09549151521237072,
'Crossref Melayu top-3': 0.1834521538307059,
'Crossref Melayu top-5': 0.23375840947886334,
'Crossref Melayu top-10': 0.3098704689225826,
'lom.agc.gov.my top-1': 0.05215334826985312,
'lom.agc.gov.my top-3': 0.09932785660941,
'lom.agc.gov.my top-5': 0.12969878018421707,
'lom.agc.gov.my top-10': 0.1797361214836943,
},
{
'model': '[llama2-embedding-1b-8k](https://huggingface.co/mesolitica/llama2-embedding-1b-8k)',
'Crossref Melayu top-1': 0.06777788934631991,
'Crossref Melayu top-3': 0.142584596847073,
'Crossref Melayu top-5': 0.18817150316296816,
'Crossref Melayu top-10': 0.25715433276433375,
'lom.agc.gov.my top-1': 0.06870799103808813,
'lom.agc.gov.my top-3': 0.1343042071197411,
'lom.agc.gov.my top-5': 0.1717699775952203,
'lom.agc.gov.my top-10': 0.23089370176748816,
},
{
'model': '[mistral-embedding-191m-8k-contrastive](https://huggingface.co/mesolitica/mistral-embedding-191m-8k-contrastive)',
'Crossref Melayu top-1': 0.08001654088700506,
'Crossref Melayu top-3': 0.17378269409697095,
'Crossref Melayu top-5': 0.232192701333609,
'Crossref Melayu top-10': 0.32482166856197664,
'lom.agc.gov.my top-1': 0.041075429424943986,
'lom.agc.gov.my top-3': 0.09148618371919343,
'lom.agc.gov.my top-5': 0.12758277321384118,
'lom.agc.gov.my top-10': 0.18707991038088126,
},
{
'model': '[mistral-embedding-349m-8k-contrastive](https://huggingface.co/mesolitica/mistral-embedding-349m-8k-contrastive)',
'Crossref Melayu top-1': 0.09045797580895276,
'Crossref Melayu top-3': 0.18742892587615012,
'Crossref Melayu top-5': 0.2444949860436266,
'Crossref Melayu top-10': 0.3398118474103174,
'lom.agc.gov.my top-1': 0.039581777445855115,
'lom.agc.gov.my top-3': 0.08849887976101568,
'lom.agc.gov.my top-5': 0.12335075927308937,
'lom.agc.gov.my top-10': 0.18558625840179238,
},
{
'model': '[llama2-embedding-600m-8k-contrastive](https://huggingface.co/mesolitica/llama2-embedding-600m-8k)',
'Crossref Melayu top-1': 0.11516592577276956,
'Crossref Melayu top-3': 0.2412901891863951,
'Crossref Melayu top-5': 0.3201695440918019,
'Crossref Melayu top-10': 0.4337847617078466,
'lom.agc.gov.my top-1': 0.05738113019666418,
'lom.agc.gov.my top-3': 0.12571570823998007,
'lom.agc.gov.my top-5': 0.16455065969629076,
'lom.agc.gov.my top-10': 0.24446104057754542,
},
{
'model': '[llama2-embedding-1b-8k-contrastive](https://huggingface.co/mesolitica/llama2-embedding-1b-8k)',
'Crossref Melayu top-1': 0.16489196733174816,
'Crossref Melayu top-3': 0.3281298459629898,
'Crossref Melayu top-5': 0.42623798201178537,
'Crossref Melayu top-10': 0.5487439263930528,
'lom.agc.gov.my top-1': 0.09123724172267862,
'lom.agc.gov.my top-3': 0.18309683843664426,
'lom.agc.gov.my top-5': 0.24271844660194175,
'lom.agc.gov.my top-10': 0.3354493403037092,
},
]
data = pd.DataFrame(close_source + open_source)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
gr.DataFrame(data, datatype = 'markdown')
demo.launch() |