Spaces:

Marqo
/

Ecommerce-Embedding-Benchmarks

Running

App Files Files Community

elliesleightholm commited on Nov 11, 2024

Commit

8b7c6a9

verified ·

1 Parent(s): ae4d4ac

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -4

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import gradio as gr
 import pandas as pd
-# ---------------      100k-Marqo-Ecommerce-Easy     -------------------
-# Embedding Models for 100k-Marqo-Ecommerce-Easy
 embedding_models = [
     '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html">Amazon-Titan-MultiModal</a>',
     '<a href="https://huggingface.co/jinaai/jina-clip-v1">Jina-V1-CLIP</a>',
@@ -140,7 +140,7 @@ with gr.Blocks(css="""
     gr.Markdown("# Ecommerce Embedding Model Benchmarks")
     gr.Markdown("This Space contains benchmark results conducted as part of the release of our ecommerce embedding models: [**`Marqo-Ecommerce-L`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-L) and [**`Marqo-Ecommerce-B`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-B). ")
-    gr.Markdown('The benchmarks are separated into \'Marqo-Ecommerce-Hard\' and \'100k-Marqo-Ecommerce-Easy\'. The "easy" dataset is about 10-30 times smaller, and  designed to accommodate rate-limited models, specifically Cohere-Embeddings-v3 and GCP-Vertex. The "hard"  dataset represents the true challenge, since it contains four million ecommerce product listings, which pushes these models to their limits in a real-world, ecommerce scenario. ' )
     gr.Markdown('Within both these scenarios, the models were benchmarked against three different tasks:')
     gr.Markdown('- **Google Shopping Text-to-Image**')
     gr.Markdown('- **Google Shopping Category-to-Image**')
@@ -160,7 +160,7 @@ with gr.Blocks(css="""
     gr.Dataframe(value=hard_ap_text2_image_3m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
     # Easy
-    gr.Markdown('## 100k-Marqo-Ecommerce-Easy')
     gr.Markdown('### Google Shopping Text to Image')
     gr.Dataframe(value=gs_text2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])

 import gradio as gr
 import pandas as pd
+# ---------------      Marqo-Ecommerce-Easy     -------------------
+# Embedding Models for Marqo-Ecommerce-Easy
 embedding_models = [
     '<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html">Amazon-Titan-MultiModal</a>',
     '<a href="https://huggingface.co/jinaai/jina-clip-v1">Jina-V1-CLIP</a>',
     gr.Markdown("# Ecommerce Embedding Model Benchmarks")
     gr.Markdown("This Space contains benchmark results conducted as part of the release of our ecommerce embedding models: [**`Marqo-Ecommerce-L`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-L) and [**`Marqo-Ecommerce-B`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-B). ")
+    gr.Markdown("Our benchmarking process was divided into two distinct regimes, each using different datasets of ecommerce product listings: **marqo-ecommerce-hard** and **marqo-ecommerce-easy**. Both datasets contained product images and text and only differed in size. The "easy" dataset is approximately 10-30 times smaller (200k vs 4M products), and designed to accommodate rate-limited models, specifically Cohere-Embeddings-v3 and GCP-Vertex (with limits of 0.66 rps and 2 rps respectively). The "hard" dataset represents the true challenge, since it contains four million ecommerce product listings and is more representative of real-world ecommerce search scenarios.")
     gr.Markdown('Within both these scenarios, the models were benchmarked against three different tasks:')
     gr.Markdown('- **Google Shopping Text-to-Image**')
     gr.Markdown('- **Google Shopping Category-to-Image**')
     gr.Dataframe(value=hard_ap_text2_image_3m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
     # Easy
+    gr.Markdown('## Marqo-Ecommerce-Easy')
     gr.Markdown('### Google Shopping Text to Image')
     gr.Dataframe(value=gs_text2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])