Spaces:

Demo750
/

XGBoost_Gaze

Running

App Files Files Community

Demo750 commited on Oct 24, 2024

Commit

39ae7e7

verified ·

1 Parent(s): 9e8e76b

Update Webpage.py

Browse files

Make it more concise

Files changed (1) hide show

Webpage.py +40 -26

Webpage.py CHANGED Viewed

@@ -10,6 +10,9 @@ CATEGORIES = list(GENERAL_CATEGORY.keys())
 CATEGORIES.sort()
 def calculate_areas(prompts, brand_num, pictorial_num, text_num):
     points_all = prompts["points"]
     brand_surf = 0
     for i in range(brand_num):
@@ -33,29 +36,26 @@ def calculate_areas(prompts, brand_num, pictorial_num, text_num):
     x1 = points_all[-2][0]; y1 = points_all[-2][1]
     x2 = points_all[-2][3]; y2 = points_all[-2][4]
     ad_size += np.abs((x1-x2)*(y1-y2))
     whole_size = 0
-    x1 = points_all[-1][0]; y1 = points_all[-1][1]
-    x2 = points_all[-1][3]; y2 = points_all[-1][4]
-    whole_size += np.abs((x1-x2)*(y1-y2))
-    return (brand_surf/whole_size*100, pictorial_surf/whole_size*100, text_surf/whole_size*100, ad_size/whole_size*100)
-def attention(whole_display_prompt, ad, context,
               brand_num, pictorial_num, text_num,
               category, ad_location, gaze_type):
-    text_detection_model_path = 'EAST-Text-Detection/frozen_east_text_detection.pb'
-    LDA_model_pth = 'LDA_Model_trained/lda_model_best_tot.model'
-    training_ad_text_dictionary_path = 'LDA_Model_trained/object_word_dictionary'
-    training_lang_preposition_path = 'LDA_Model_trained/dutch_preposition'
-    # caption_ad = XGBoost_utils.Caption_Generation(ad)
-    # caption_context = XGBoost_utils.Caption_Generation(context)
-    # ad_topic = XGBoost_utils.Topic_emb(caption_ad)
-    # ctpg_topic = XGBoost_utils.Topic_emb(caption_context)
-    ad_topic = np.random.randn(1,768)
-    ctpg_topic = np.random.randn(1,768)
     prod_group = np.zeros(38)
     prod_group[GENERAL_CATEGORY[category]] = 1
@@ -67,13 +67,18 @@ def attention(whole_display_prompt, ad, context,
     else:
         ad_loc = None
-    brand_percent, visual_percent, text_percent, adv_size_percent = calculate_areas(whole_display_prompt, brand_num, pictorial_num, text_num)
     surfaces = [brand_percent, visual_percent, text_percent, adv_size_percent*10/100]
-    ad = ad.convert('RGB')
-    ad = cv.resize(np.array(ad), (640, 832))
-    context = context.convert('RGB')
-    context = cv.resize(np.array(context), (640, 832))
     Gaze = Predict.Ad_Gaze_Prediction(input_ad_path=ad, input_ctpg_path=context, ad_location=ad_loc,
@@ -89,9 +94,16 @@ def greet(name, intensity):
 demo = gr.Interface(
     fn=attention,
-    inputs=[ImagePrompter(label="Upload Entire (Ad+Context) Image, and Draw Bounding Boxes"),
-            gr.Image(label="Ad Image", sources=['upload', 'webcam'], type="pil"),
-            gr.Image(label="Context Image", sources=['upload', 'webcam'], type="pil"),
             gr.Number(label="Number of brand bounding boxes drawn"),
             gr.Number(label="Number of pictorial bounding boxes drawn"),
             gr.Number(label="Number of text bounding boxes drawn"),
@@ -100,8 +112,10 @@ demo = gr.Interface(
             gr.Textbox(label="Gaze Type", info="Enter Ad or Brand")
             ],
     outputs=[gr.Number(label="Predicted Gaze (sec)")],
-    title="Gaze Prediction",
-    description="In the section right below, please first upload the entire image that contains both ad and context, then draw bounding boxes. Please draw ALL Bounding Boxes in the order of: (1) brand, (2) pictorial, (3) text elements, (4) advertisement and (5) the entire image here. NOTE: Each ad element can have more than 1 boxes.",
     theme=gr.themes.Soft()
 )

 CATEGORIES.sort()
 def calculate_areas(prompts, brand_num, pictorial_num, text_num):
+    image_entire = prompts["image"]
+    w, h = image_entire.size
+    image_entire = np.array(image_entire.convert('RGB'))
     points_all = prompts["points"]
     brand_surf = 0
     for i in range(brand_num):
     x1 = points_all[-2][0]; y1 = points_all[-2][1]
     x2 = points_all[-2][3]; y2 = points_all[-2][4]
     ad_size += np.abs((x1-x2)*(y1-y2))
+    ad_image = image_entire[int(y1):int(y2), int(x1):int(x2), :]
+    left_margin = x1; right_margin = w-x2
+    if left_margin >= right_margin:
+        context_image = image_entire[:, :int(x1), :]
+    else:
+        context_image = image_entire[:, int(x2):, :]
     whole_size = 0
+    whole_size += w*h
+    return (brand_surf/whole_size*100, pictorial_surf/whole_size*100, text_surf/whole_size*100, ad_size/whole_size*100, ad_image, context_image)
+def attention(notes, whole_display_prompt,
               brand_num, pictorial_num, text_num,
               category, ad_location, gaze_type):
+    text_detection_model_path = '../XGBoost_Prediction_Model/EAST-Text-Detection/frozen_east_text_detection.pb'
+    LDA_model_pth = '../XGBoost_Prediction_Model/LDA_Model_trained/lda_model_best_tot.model'
+    training_ad_text_dictionary_path = '../XGBoost_Prediction_Model/LDA_Model_trained/object_word_dictionary'
+    training_lang_preposition_path = '../XGBoost_Prediction_Model/LDA_Model_trained/dutch_preposition'
     prod_group = np.zeros(38)
     prod_group[GENERAL_CATEGORY[category]] = 1
     else:
         ad_loc = None
+    brand_percent, visual_percent, text_percent, adv_size_percent, ad_image, context_image = calculate_areas(whole_display_prompt, brand_num, pictorial_num, text_num)
     surfaces = [brand_percent, visual_percent, text_percent, adv_size_percent*10/100]
+    # caption_ad = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(ad_image)))
+    # caption_context = XGBoost_utils.Caption_Generation(Image.fromarray(np.uint8(context_image)))
+    # ad_topic = XGBoost_utils.Topic_emb(caption_ad)
+    # ctpg_topic = XGBoost_utils.Topic_emb(caption_context)
+    ad_topic = np.random.randn(1,768)
+    ctpg_topic = np.random.randn(1,768)
+    ad = cv.resize(ad_image, (640, 832))
+    context = cv.resize(context_image, (640, 832))
     Gaze = Predict.Ad_Gaze_Prediction(input_ad_path=ad, input_ctpg_path=context, ad_location=ad_loc,
 demo = gr.Interface(
     fn=attention,
+    inputs=[gr.Markdown("""
+                        Instruction:
+                        1. Click to upload or drag the entire image that contains BOTH ad and its context;
+                        2. Draw bounding boxes in the order of:
+                        &nbsp;&nbsp;&nbsp;(a) Brand element(s)
+                        &nbsp;&nbsp;&nbsp;(b) Pictorial element(s)
+                        &nbsp;&nbsp;&nbsp;(c) Text element(s)
+                        &nbsp;&nbsp;&nbsp;(d) The advertisement.
+                        NOTE: Each ad element can have more than 1 boxes."""),
+            ImagePrompter(label="Upload Entire (Ad+Context) Image, and Draw Bounding Boxes", sources=['upload'], type="pil"),
             gr.Number(label="Number of brand bounding boxes drawn"),
             gr.Number(label="Number of pictorial bounding boxes drawn"),
             gr.Number(label="Number of text bounding boxes drawn"),
             gr.Textbox(label="Gaze Type", info="Enter Ad or Brand")
             ],
     outputs=[gr.Number(label="Predicted Gaze (sec)")],
+    title="Ad Gaze Prediction",
+    description="""This app accompanies: "Contextual Advertising with Theory-Informed Machine Learning", manuscript submitted to the Journal of Marketing.
+                   App Version: 1.0, Date: 10/24/2024.
+                   Warning: Due to computational efficiency, current version has not activated LLM generated ad topics. In future version, LLM topics will be activated in GPU environment.""",
     theme=gr.themes.Soft()
 )