Aaron Snoswell commited on
Commit
e57bd03
·
1 Parent(s): ed5173f

Tidy up code-path switching between neutralise and diversify ranker algorithm modes

Browse files
Files changed (2) hide show
  1. my_web_app.py +6 -3
  2. reranker.py +39 -16
my_web_app.py CHANGED
@@ -1,9 +1,10 @@
1
 
2
  from flask import Flask, jsonify, request
3
  from flask_cors import CORS
4
- from reranker import rankingfunc
5
 
6
- VERSION = 1.2
 
 
7
 
8
  app = Flask(__name__)
9
  CORS(app)
@@ -22,6 +23,8 @@ def version():
22
  @app.route('/rank', methods=['POST'])
23
  def perform_ranking():
24
  post_data = request.json
 
 
25
  debug = request.args.get('debug', type=bool, default=False)
26
 
27
  # Get session details
@@ -33,7 +36,7 @@ def perform_ranking():
33
  current_time: The current time according to the user's browser, in UTC, in YYYY-MM-DD hh:mm:ss format.
34
  """
35
 
36
- results = rankingfunc(post_data, debug=debug)
37
 
38
  return jsonify(results)
39
 
 
1
 
2
  from flask import Flask, jsonify, request
3
  from flask_cors import CORS
 
4
 
5
+ from reranker import rankingfunc, RankingModes
6
+
7
+ VERSION = 1.3
8
 
9
  app = Flask(__name__)
10
  CORS(app)
 
23
  @app.route('/rank', methods=['POST'])
24
  def perform_ranking():
25
  post_data = request.json
26
+ mode = request.args.get('mode', type=str, default=RankingModes.DIVERSIFY)
27
+ k = request.args.get('k', type=int, default=10)
28
  debug = request.args.get('debug', type=bool, default=False)
29
 
30
  # Get session details
 
36
  current_time: The current time according to the user's browser, in UTC, in YYYY-MM-DD hh:mm:ss format.
37
  """
38
 
39
+ results = rankingfunc(post_data, k=k, mode=mode, debug=debug)
40
 
41
  return jsonify(results)
42
 
reranker.py CHANGED
@@ -1,24 +1,27 @@
 
1
  import os
2
- import requests
 
 
3
  import numpy as np
4
- from utils import *
5
- from numpy.linalg import norm
6
- from scipy.stats import rankdata
 
7
  from sklearn.utils.extmath import softmax
8
  from sentence_transformers import SentenceTransformer
9
- from copy import deepcopy
10
 
11
- import torch
12
 
13
- import torch.nn.functional as F
14
 
15
- #environment setup for HF docker image
16
  try:
17
  os.mkdir('./cache')
18
  except FileExistsError:
19
  # Use existing cache dir
20
  pass
21
 
 
22
  # Create embeddings from example texts
23
  # Guessing which environ var is correct
24
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = './cache'
@@ -44,13 +47,17 @@ TARGET_DISTRIBUTION = [0.5, 0.5]
44
  # Controls the weight of the initial relevance score (0: ignore initial score, 1: only uses initial score)
45
  LAMBDA = 0.5
46
 
47
- #True to run diversification
48
- DIVERSIFY = True
 
 
 
49
 
50
  def fairScore(prob_scores:list, target:list) -> float:
51
  similarity = 1 - get_jsd_distance(prob_scores, target)
52
  return similarity
53
-
 
54
  def diversify(candidates: list, candidates_representation: dict, target: list) -> dict:
55
  accumulator = np.zeros(len(target))
56
  remaining = candidates.copy()
@@ -78,13 +85,16 @@ def diversify(candidates: list, candidates_representation: dict, target: list) -
78
  return diversified
79
 
80
 
81
- def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
82
  '''
83
  Rank a set of social media posts using our ranking algorithm
84
 
85
  Inputs:
86
  inputJSON (dict): JSON dict from the web browser plugin, following the
87
  provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
 
 
 
88
  debug (bool): If set, will also return extra debugging info in the return struct
89
 
90
  Returns:
@@ -92,9 +102,16 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
92
  provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
93
  '''
94
 
 
 
 
95
  # Extract text documents and get embeddings
96
  candidates = inputJSON['items']
97
- k = min(k, len(candidates))
 
 
 
 
98
  if (debug):
99
  print("Reranking top ", k)
100
 
@@ -128,7 +145,7 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
128
  print(initial_scores)
129
 
130
  diversity_scores = []
131
- if (DIVERSIFY):
132
  diversity_scores = diversify(candidates, candidates_representation, TARGET_DISTRIBUTION)
133
 
134
  for index in range(len(candidates)):
@@ -138,12 +155,16 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
138
  source = [(lw_cs[index] + 1.0) * 0.5, (rw_cs[index] + 1.0) * 0.5]
139
  source = F.softmax(torch.stack(source), dim=0)
140
  fairness = 0
141
- if (DIVERSIFY):
 
142
  # Diversification:
143
  fairness = diversity_scores[candidates[index]['id']]
144
- else:
145
  # Neutralization:
146
  fairness = fairScore(source, TARGET_DISTRIBUTION)
 
 
 
147
  new_score = linearCombination(relevance, fairness, LAMBDA)
148
  candidates[index]['score'] = new_score
149
 
@@ -161,6 +182,8 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
161
 
162
  final_ranking = reranked_ids
163
 
 
 
164
  output_results = {
165
  "ranked_ids": final_ranking,
166
  "new_items": []
 
1
+
2
  import os
3
+ import torch
4
+ import warnings
5
+
6
  import numpy as np
7
+ import torch.nn.functional as F
8
+
9
+ from enum import Enum
10
+ from copy import deepcopy
11
  from sklearn.utils.extmath import softmax
12
  from sentence_transformers import SentenceTransformer
 
13
 
14
+ from utils import *
15
 
 
16
 
17
+ # Environment setup for HF docker image
18
  try:
19
  os.mkdir('./cache')
20
  except FileExistsError:
21
  # Use existing cache dir
22
  pass
23
 
24
+
25
  # Create embeddings from example texts
26
  # Guessing which environ var is correct
27
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = './cache'
 
47
  # Controls the weight of the initial relevance score (0: ignore initial score, 1: only uses initial score)
48
  LAMBDA = 0.5
49
 
50
+ # The different modes our ranking algorithm can run in
51
+ class RankingModes(Enum):
52
+ DIVERSIFY = "diversify"
53
+ NEUTRALISE = "neutralise"
54
+
55
 
56
  def fairScore(prob_scores:list, target:list) -> float:
57
  similarity = 1 - get_jsd_distance(prob_scores, target)
58
  return similarity
59
+
60
+
61
  def diversify(candidates: list, candidates_representation: dict, target: list) -> dict:
62
  accumulator = np.zeros(len(target))
63
  remaining = candidates.copy()
 
85
  return diversified
86
 
87
 
88
+ def rankingfunc(inputJSON: dict, k: int = 10, mode: str = RankingModes.DIVERSIFY, debug: bool = False) -> dict:
89
  '''
90
  Rank a set of social media posts using our ranking algorithm
91
 
92
  Inputs:
93
  inputJSON (dict): JSON dict from the web browser plugin, following the
94
  provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
95
+ k (int): We only mess with the ranking of the first k items in the feed, to avoid
96
+ unduly reducing engagement.
97
+ mode (str): The ranker algorithm mode. Options include 'diversify' or 'neutralise'.
98
  debug (bool): If set, will also return extra debugging info in the return struct
99
 
100
  Returns:
 
102
  provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
103
  '''
104
 
105
+ assert k > 0, "k must be a positive integer greater than 0, but was {k}"
106
+ assert mode in RankingModes, f"mode must be in {RankingModes}, but was {mode}"
107
+
108
  # Extract text documents and get embeddings
109
  candidates = inputJSON['items']
110
+
111
+ if len(candidates) < k:
112
+ warnings.warn(f"k truncated from {k} to {len(candidates)} due to only that many posts being passed")
113
+ k = min(k, len(candidates))
114
+
115
  if (debug):
116
  print("Reranking top ", k)
117
 
 
145
  print(initial_scores)
146
 
147
  diversity_scores = []
148
+ if mode == RankingModes.DIVERSIFY:
149
  diversity_scores = diversify(candidates, candidates_representation, TARGET_DISTRIBUTION)
150
 
151
  for index in range(len(candidates)):
 
155
  source = [(lw_cs[index] + 1.0) * 0.5, (rw_cs[index] + 1.0) * 0.5]
156
  source = F.softmax(torch.stack(source), dim=0)
157
  fairness = 0
158
+
159
+ if mode == RankingModes.DIVERSIFY:
160
  # Diversification:
161
  fairness = diversity_scores[candidates[index]['id']]
162
+ elif mode == RankingModes.NEUTRALISE:
163
  # Neutralization:
164
  fairness = fairScore(source, TARGET_DISTRIBUTION)
165
+ else:
166
+ raise ValueError(f"Unknown ranking algorithm mode: {mode}")
167
+
168
  new_score = linearCombination(relevance, fairness, LAMBDA)
169
  candidates[index]['score'] = new_score
170
 
 
182
 
183
  final_ranking = reranked_ids
184
 
185
+ # TODO ajs 15/Apr/2024 Find a way to source high-quality out-of-feed posts, then incorporate them into the fusion algorithm
186
+
187
  output_results = {
188
  "ranked_ids": final_ranking,
189
  "new_items": []