File size: 5,623 Bytes
66b4965 e016664 8cb674a e016664 66b4965 8cb674a 3607302 4a8a2ad 3607302 c4e2aeb b5ad3db 66b4965 e016664 c4e2aeb cf39065 c4e2aeb 17bdd91 c4e2aeb 17bdd91 c4e2aeb 8cb674a 17bdd91 8cb674a 17bdd91 8cb674a 18566fa 17bdd91 f53967f 18566fa 17bdd91 bf23c7b 17bdd91 3435c28 18566fa c4e2aeb 66b4965 e016664 66b4965 c4e2aeb 66b4965 e016664 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
import pandas as pd
from css_html_js import custom_css
TITLE = """<h1 align="center" id="space-title">π²πΎ Malaysian RAG Embedding Leaderboard</h1>"""
INTRODUCTION_TEXT = """
π The π²πΎ Malaysian RAG Embedding Leaderboard aims to track, rank and evaluate Top-k retrieval using embedding models. All notebooks at https://github.com/mesolitica/embedding-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/Malaysian-Embedding-Leaderboard/discussions with link to the notebook.
## Dataset
π We evaluate models based on 2 datasets,
1. Research paper keyword `melayu` using Crossref, https://huggingface.co/datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-crossref-melayu-malay.jsonl
2. lom.agc.gov.my PDF files, https://huggingface.co/datasets/mesolitica/malaysian-ultrachat/resolve/main/ultrachat-lom-agc.jsonl
"""
close_source = [
{
'model': 'OpenAI ADA-002',
'Crossref Melayu top-1': 0.3155939351340496,
'Crossref Melayu top-3': 0.5120996083944171,
'Crossref Melayu top-5': 0.5878100210864544,
'Crossref Melayu top-10': 0.6721558389396526,
'lom.agc.gov.my top-1': 0.19168533731640527,
'lom.agc.gov.my top-3': 0.2827981080408265,
'lom.agc.gov.my top-5': 0.322504356484939,
'lom.agc.gov.my top-10': 0.36855862584017923,
}
]
open_source = [
{
'model': '[mesolitica/llama2-embedding-600m-8k](https://huggingface.co/mesolitica/llama2-embedding-600m-8k)',
'Crossref Melayu top-1': 0.09549151521237072,
'Crossref Melayu top-3': 0.1834521538307059,
'Crossref Melayu top-5': 0.23375840947886334,
'Crossref Melayu top-10': 0.3098704689225826,
'lom.agc.gov.my top-1': 0.05215334826985312,
'lom.agc.gov.my top-3': 0.09932785660941,
'lom.agc.gov.my top-5': 0.12969878018421707,
'lom.agc.gov.my top-10': 0.1797361214836943,
},
{
'model': '[mesolitica/llama2-embedding-1b-8k](https://huggingface.co/mesolitica/llama2-embedding-1b-8k)',
'Crossref Melayu top-1': 0.06777788934631991,
'Crossref Melayu top-3': 0.142584596847073,
'Crossref Melayu top-5': 0.18817150316296816,
'Crossref Melayu top-10': 0.25715433276433375,
'lom.agc.gov.my top-1': 0.06870799103808813,
'lom.agc.gov.my top-3': 0.1343042071197411,
'lom.agc.gov.my top-5': 0.1717699775952203,
'lom.agc.gov.my top-10': 0.23089370176748816,
},
{
'model': '[mesolitica/mistral-embedding-191m-8k-contrastive](https://huggingface.co/mesolitica/mistral-embedding-191m-8k-contrastive)',
'Crossref Melayu top-1': 0.08001654088700506,
'Crossref Melayu top-3': 0.17378269409697095,
'Crossref Melayu top-5': 0.232192701333609,
'Crossref Melayu top-10': 0.32482166856197664,
'lom.agc.gov.my top-1': 0.041075429424943986,
'lom.agc.gov.my top-3': 0.09148618371919343,
'lom.agc.gov.my top-5': 0.12758277321384118,
'lom.agc.gov.my top-10': 0.18707991038088126,
},
{
'model': '[mesolitica/mistral-embedding-349m-8k-contrastive](https://huggingface.co/mesolitica/mistral-embedding-349m-8k-contrastive)',
'Crossref Melayu top-1': 0.09045797580895276,
'Crossref Melayu top-3': 0.18742892587615012,
'Crossref Melayu top-5': 0.2444949860436266,
'Crossref Melayu top-10': 0.3398118474103174,
'lom.agc.gov.my top-1': 0.039581777445855115,
'lom.agc.gov.my top-3': 0.08849887976101568,
'lom.agc.gov.my top-5': 0.12335075927308937,
'lom.agc.gov.my top-10': 0.18558625840179238,
},
{
'model': '[mesolitica/llama2-embedding-600m-8k-contrastive](https://huggingface.co/mesolitica/llama2-embedding-600m-8k)',
'Crossref Melayu top-1': 0.1260208828698439,
'Crossref Melayu top-3': 0.2649643337123953,
'Crossref Melayu top-5': 0.3535614597332782,
'Crossref Melayu top-10': 0.4751369792205107,
'lom.agc.gov.my top-1': 0.0660941000746826,
'lom.agc.gov.my top-3': 0.13754045307443366,
'lom.agc.gov.my top-5': 0.18869803335822755,
'lom.agc.gov.my top-10': 0.2745830221558377,
},
{
'model': '[mesolitica/llama2-embedding-1b-8k-contrastive](https://huggingface.co/mesolitica/llama2-embedding-1b-8k)',
'Crossref Melayu top-1': 0.21958027499224647,
'Crossref Melayu top-3': 0.42809883179985525,
'Crossref Melayu top-5': 0.5379923498397602,
'Crossref Melayu top-10': 0.6708363485991936,
'lom.agc.gov.my top-1': 0.11899427433408016,
'lom.agc.gov.my top-3': 0.239731142643764,
'lom.agc.gov.my top-5': 0.3094349016679114,
'lom.agc.gov.my top-10': 0.4134926562111028,
},
{
'model': '[mesolitica/llama2-embedding-2b-8k-contrastive](https://huggingface.co/mesolitica/llama2-embedding-2b-8k)',
'Crossref Melayu top-1': 0.2603122092422206,
'Crossref Melayu top-3': 0.4910575829628864,
'Crossref Melayu top-5': 0.6041558978600228,
'Crossref Melayu top-10': 0.7295564974671767,
'lom.agc.gov.my top-1': 0.15011202389843167,
'lom.agc.gov.my top-3': 0.2803086880756784,
'lom.agc.gov.my top-5': 0.35760517799352753,
'lom.agc.gov.my top-10': 0.466268359472243,
},
]
data = pd.DataFrame(close_source + open_source)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
gr.DataFrame(data, datatype = 'markdown')
demo.launch() |