machineuser commited on
Commit
9ce2247
1 Parent(s): 30c18c2

Sync widgets demo

Browse files
packages/tasks/src/tasks/depth-estimation/data.ts CHANGED
@@ -24,14 +24,16 @@ const taskData: TaskDataCustom = {
24
  metrics: [],
25
  models: [
26
  {
27
- // TO DO: write description
28
  description: "Strong Depth Estimation model trained on 1.4 million images.",
29
  id: "Intel/dpt-large",
30
  },
31
  {
32
- // TO DO: write description
33
  description: "Strong Depth Estimation model trained on the KITTI dataset.",
34
- id: "vinvino02/glpn-kitti",
 
 
 
 
35
  },
36
  ],
37
  spaces: [
 
24
  metrics: [],
25
  models: [
26
  {
 
27
  description: "Strong Depth Estimation model trained on 1.4 million images.",
28
  id: "Intel/dpt-large",
29
  },
30
  {
 
31
  description: "Strong Depth Estimation model trained on the KITTI dataset.",
32
+ id: "facebook/dpt-dinov2-large-kitti",
33
+ },
34
+ {
35
+ description: "A strong monocular depth estimation model.",
36
+ id: "Bingxin/Marigold",
37
  },
38
  ],
39
  spaces: [
packages/tasks/src/tasks/document-question-answering/data.ts CHANGED
@@ -50,6 +50,10 @@ const taskData: TaskDataCustom = {
50
  description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
51
  id: "naver-clova-ix/donut-base-finetuned-docvqa",
52
  },
 
 
 
 
53
  ],
54
  spaces: [
55
  {
@@ -60,6 +64,10 @@ const taskData: TaskDataCustom = {
60
  description: "An application that can answer questions from invoices.",
61
  id: "impira/invoices",
62
  },
 
 
 
 
63
  ],
64
  summary:
65
  "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
 
50
  description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
51
  id: "naver-clova-ix/donut-base-finetuned-docvqa",
52
  },
53
+ {
54
+ description: "A powerful model for document question answering.",
55
+ id: "google/pix2struct-docvqa-large",
56
+ },
57
  ],
58
  spaces: [
59
  {
 
64
  description: "An application that can answer questions from invoices.",
65
  id: "impira/invoices",
66
  },
67
+ {
68
+ description: "An application to compare different document question answering models.",
69
+ id: "merve/compare_docvqa_models",
70
+ },
71
  ],
72
  summary:
73
  "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
packages/tasks/src/tasks/image-to-text/data.ts CHANGED
@@ -32,30 +32,22 @@ const taskData: TaskDataCustom = {
32
  models: [
33
  {
34
  description: "A robust image captioning model.",
35
- id: "Salesforce/blip-image-captioning-large",
36
  },
37
  {
38
- description: "A strong image captioning model.",
39
- id: "nlpconnect/vit-gpt2-image-captioning",
40
  },
41
  {
42
  description: "A strong optical character recognition model.",
43
- id: "microsoft/trocr-base-printed",
44
- },
45
- {
46
- description: "A strong visual question answering model for scientific diagrams.",
47
- id: "google/pix2struct-ai2d-base",
48
- },
49
- {
50
- description: "A strong captioning model for UI components.",
51
- id: "google/pix2struct-widget-captioning-base",
52
- },
53
- {
54
- description: "A captioning model for images that contain text.",
55
- id: "google/pix2struct-textcaps-base",
56
  },
57
  ],
58
  spaces: [
 
 
 
 
59
  {
60
  description: "A robust image captioning application.",
61
  id: "flax-community/image-captioning",
 
32
  models: [
33
  {
34
  description: "A robust image captioning model.",
35
+ id: "Salesforce/blip2-opt-2.7b",
36
  },
37
  {
38
+ description: "A powerful and accurate image-to-text model that can also localize concepts in images.",
39
+ id: "microsoft/kosmos-2-patch14-224",
40
  },
41
  {
42
  description: "A strong optical character recognition model.",
43
+ id: "facebook/nougat-base",
 
 
 
 
 
 
 
 
 
 
 
 
44
  },
45
  ],
46
  spaces: [
47
+ {
48
+ description: "An application that compares various image captioning models.",
49
+ id: "nielsr/comparing-captioning-models",
50
+ },
51
  {
52
  description: "A robust image captioning application.",
53
  id: "flax-community/image-captioning",
packages/tasks/src/tasks/object-detection/data.ts CHANGED
@@ -40,7 +40,6 @@ const taskData: TaskDataCustom = {
40
  ],
41
  models: [
42
  {
43
- // TO DO: write description
44
  description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
45
  id: "facebook/detr-resnet-50",
46
  },
@@ -50,9 +49,13 @@ const taskData: TaskDataCustom = {
50
  },
51
  ],
52
  spaces: [
 
 
 
 
53
  {
54
  description: "An object detection application that can detect unseen objects out of the box.",
55
- id: "adirik/OWL-ViT",
56
  },
57
  {
58
  description: "An application that contains various object detection models to try from.",
 
40
  ],
41
  models: [
42
  {
 
43
  description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
44
  id: "facebook/detr-resnet-50",
45
  },
 
49
  },
50
  ],
51
  spaces: [
52
+ {
53
+ description: "Leaderboard to compare various object detection models across several metrics.",
54
+ id: "hf-vision/object_detection_leaderboard",
55
+ },
56
  {
57
  description: "An object detection application that can detect unseen objects out of the box.",
58
+ id: "merve/owlv2",
59
  },
60
  {
61
  description: "An application that contains various object detection models to try from.",
packages/tasks/src/tasks/text-to-image/data.ts CHANGED
@@ -45,14 +45,12 @@ const taskData: TaskDataCustom = {
45
  ],
46
  models: [
47
  {
48
- description:
49
- "A latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
50
- id: "CompVis/stable-diffusion-v1-4",
51
  },
52
  {
53
- description:
54
- "A model that can be used to generate images based on text prompts. The DALL·E Mega model is the largest version of DALLE Mini.",
55
- id: "dalle-mini/dalle-mega",
56
  },
57
  {
58
  description: "A text-to-image model that can generate coherent text inside image.",
@@ -69,19 +67,23 @@ const taskData: TaskDataCustom = {
69
  id: "stabilityai/stable-diffusion",
70
  },
71
  {
72
- description: "An text-to-image application that can generate coherent text inside the image.",
 
 
 
 
73
  id: "DeepFloyd/IF",
74
  },
75
  {
76
- description: "An powerful text-to-image application that can generate images.",
77
- id: "kakaobrain/karlo",
78
  },
79
  {
80
- description: "An powerful text-to-image application that can generates 3D representations.",
81
  id: "hysts/Shap-E",
82
  },
83
  {
84
- description: "A strong application for `text-to-image`, `image-to-image` and image inpainting.",
85
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
86
  },
87
  ],
 
45
  ],
46
  models: [
47
  {
48
+ description: "One of the most powerful image generation models that can generate realistic outputs.",
49
+ id: "stabilityai/stable-diffusion-xl-base-1.0",
 
50
  },
51
  {
52
+ description: "A powerful yet fast image generation model.",
53
+ id: "latent-consistency/lcm-lora-sdxl",
 
54
  },
55
  {
56
  description: "A text-to-image model that can generate coherent text inside image.",
 
67
  id: "stabilityai/stable-diffusion",
68
  },
69
  {
70
+ description: "A text-to-image application to generate comics.",
71
+ id: "jbilcke-hf/ai-comic-factory",
72
+ },
73
+ {
74
+ description: "A text-to-image application that can generate coherent text inside the image.",
75
  id: "DeepFloyd/IF",
76
  },
77
  {
78
+ description: "A powerful yet very fast image generation application.",
79
+ id: "latent-consistency/lcm-lora-for-sdxl",
80
  },
81
  {
82
+ description: "A powerful text-to-image application that can generate 3D representations.",
83
  id: "hysts/Shap-E",
84
  },
85
  {
86
+ description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
87
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
88
  },
89
  ],
packages/tasks/src/tasks/text-to-video/data.ts CHANGED
@@ -68,7 +68,7 @@ const taskData: TaskDataCustom = {
68
  models: [
69
  {
70
  description: "A strong model for video generation.",
71
- id: "PAIR/text2video-zero-controlnet-canny-arcane",
72
  },
73
  {
74
  description: "A robust model for text-to-video generation.",
@@ -76,7 +76,7 @@ const taskData: TaskDataCustom = {
76
  },
77
  {
78
  description: "A text-to-video generation model with high quality and smooth outputs.",
79
- id: "cerspense/zeroscope_v2_576w",
80
  },
81
  ],
82
  spaces: [
@@ -86,7 +86,7 @@ const taskData: TaskDataCustom = {
86
  },
87
  {
88
  description: "An application that generates video from image and text.",
89
- id: "TempoFunk/makeavid-sd-jax",
90
  },
91
  {
92
  description: "An application that generates videos from text and provides multi-model support.",
 
68
  models: [
69
  {
70
  description: "A strong model for video generation.",
71
+ id: "Vchitect/LaVie",
72
  },
73
  {
74
  description: "A robust model for text-to-video generation.",
 
76
  },
77
  {
78
  description: "A text-to-video generation model with high quality and smooth outputs.",
79
+ id: "hotshotco/Hotshot-XL",
80
  },
81
  ],
82
  spaces: [
 
86
  },
87
  {
88
  description: "An application that generates video from image and text.",
89
+ id: "Vchitect/LaVie",
90
  },
91
  {
92
  description: "An application that generates videos from text and provides multi-model support.",
packages/tasks/src/tasks/visual-question-answering/data.ts CHANGED
@@ -71,6 +71,10 @@ const taskData: TaskDataCustom = {
71
  },
72
  ],
73
  spaces: [
 
 
 
 
74
  {
75
  description: "An application that can answer questions based on images.",
76
  id: "nielsr/vilt-vqa",
 
71
  },
72
  ],
73
  spaces: [
74
+ {
75
+ description: "An application that compares visual question answering models across different tasks.",
76
+ id: "merve/pix2struct",
77
+ },
78
  {
79
  description: "An application that can answer questions based on images.",
80
  id: "nielsr/vilt-vqa",