Spaces:
Runtime error
Runtime error
Aaron Snoswell
commited on
Commit
·
e57bd03
1
Parent(s):
ed5173f
Tidy up code-path switching between neutralise and diversify ranker algorithm modes
Browse files- my_web_app.py +6 -3
- reranker.py +39 -16
my_web_app.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
|
2 |
from flask import Flask, jsonify, request
|
3 |
from flask_cors import CORS
|
4 |
-
from reranker import rankingfunc
|
5 |
|
6 |
-
|
|
|
|
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
CORS(app)
|
@@ -22,6 +23,8 @@ def version():
|
|
22 |
@app.route('/rank', methods=['POST'])
|
23 |
def perform_ranking():
|
24 |
post_data = request.json
|
|
|
|
|
25 |
debug = request.args.get('debug', type=bool, default=False)
|
26 |
|
27 |
# Get session details
|
@@ -33,7 +36,7 @@ def perform_ranking():
|
|
33 |
current_time: The current time according to the user's browser, in UTC, in YYYY-MM-DD hh:mm:ss format.
|
34 |
"""
|
35 |
|
36 |
-
results = rankingfunc(post_data, debug=debug)
|
37 |
|
38 |
return jsonify(results)
|
39 |
|
|
|
1 |
|
2 |
from flask import Flask, jsonify, request
|
3 |
from flask_cors import CORS
|
|
|
4 |
|
5 |
+
from reranker import rankingfunc, RankingModes
|
6 |
+
|
7 |
+
VERSION = 1.3
|
8 |
|
9 |
app = Flask(__name__)
|
10 |
CORS(app)
|
|
|
23 |
@app.route('/rank', methods=['POST'])
|
24 |
def perform_ranking():
|
25 |
post_data = request.json
|
26 |
+
mode = request.args.get('mode', type=str, default=RankingModes.DIVERSIFY)
|
27 |
+
k = request.args.get('k', type=int, default=10)
|
28 |
debug = request.args.get('debug', type=bool, default=False)
|
29 |
|
30 |
# Get session details
|
|
|
36 |
current_time: The current time according to the user's browser, in UTC, in YYYY-MM-DD hh:mm:ss format.
|
37 |
"""
|
38 |
|
39 |
+
results = rankingfunc(post_data, k=k, mode=mode, debug=debug)
|
40 |
|
41 |
return jsonify(results)
|
42 |
|
reranker.py
CHANGED
@@ -1,24 +1,27 @@
|
|
|
|
1 |
import os
|
2 |
-
import
|
|
|
|
|
3 |
import numpy as np
|
4 |
-
|
5 |
-
|
6 |
-
from
|
|
|
7 |
from sklearn.utils.extmath import softmax
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
-
from copy import deepcopy
|
10 |
|
11 |
-
import
|
12 |
|
13 |
-
import torch.nn.functional as F
|
14 |
|
15 |
-
#
|
16 |
try:
|
17 |
os.mkdir('./cache')
|
18 |
except FileExistsError:
|
19 |
# Use existing cache dir
|
20 |
pass
|
21 |
|
|
|
22 |
# Create embeddings from example texts
|
23 |
# Guessing which environ var is correct
|
24 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = './cache'
|
@@ -44,13 +47,17 @@ TARGET_DISTRIBUTION = [0.5, 0.5]
|
|
44 |
# Controls the weight of the initial relevance score (0: ignore initial score, 1: only uses initial score)
|
45 |
LAMBDA = 0.5
|
46 |
|
47 |
-
#
|
48 |
-
|
|
|
|
|
|
|
49 |
|
50 |
def fairScore(prob_scores:list, target:list) -> float:
|
51 |
similarity = 1 - get_jsd_distance(prob_scores, target)
|
52 |
return similarity
|
53 |
-
|
|
|
54 |
def diversify(candidates: list, candidates_representation: dict, target: list) -> dict:
|
55 |
accumulator = np.zeros(len(target))
|
56 |
remaining = candidates.copy()
|
@@ -78,13 +85,16 @@ def diversify(candidates: list, candidates_representation: dict, target: list) -
|
|
78 |
return diversified
|
79 |
|
80 |
|
81 |
-
def rankingfunc(inputJSON: dict,
|
82 |
'''
|
83 |
Rank a set of social media posts using our ranking algorithm
|
84 |
|
85 |
Inputs:
|
86 |
inputJSON (dict): JSON dict from the web browser plugin, following the
|
87 |
provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
|
|
|
|
|
|
|
88 |
debug (bool): If set, will also return extra debugging info in the return struct
|
89 |
|
90 |
Returns:
|
@@ -92,9 +102,16 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
|
|
92 |
provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
|
93 |
'''
|
94 |
|
|
|
|
|
|
|
95 |
# Extract text documents and get embeddings
|
96 |
candidates = inputJSON['items']
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
if (debug):
|
99 |
print("Reranking top ", k)
|
100 |
|
@@ -128,7 +145,7 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
|
|
128 |
print(initial_scores)
|
129 |
|
130 |
diversity_scores = []
|
131 |
-
if
|
132 |
diversity_scores = diversify(candidates, candidates_representation, TARGET_DISTRIBUTION)
|
133 |
|
134 |
for index in range(len(candidates)):
|
@@ -138,12 +155,16 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
|
|
138 |
source = [(lw_cs[index] + 1.0) * 0.5, (rw_cs[index] + 1.0) * 0.5]
|
139 |
source = F.softmax(torch.stack(source), dim=0)
|
140 |
fairness = 0
|
141 |
-
|
|
|
142 |
# Diversification:
|
143 |
fairness = diversity_scores[candidates[index]['id']]
|
144 |
-
|
145 |
# Neutralization:
|
146 |
fairness = fairScore(source, TARGET_DISTRIBUTION)
|
|
|
|
|
|
|
147 |
new_score = linearCombination(relevance, fairness, LAMBDA)
|
148 |
candidates[index]['score'] = new_score
|
149 |
|
@@ -161,6 +182,8 @@ def rankingfunc(inputJSON: dict, debug: bool = False, k: int = 10) -> dict:
|
|
161 |
|
162 |
final_ranking = reranked_ids
|
163 |
|
|
|
|
|
164 |
output_results = {
|
165 |
"ranked_ids": final_ranking,
|
166 |
"new_items": []
|
|
|
1 |
+
|
2 |
import os
|
3 |
+
import torch
|
4 |
+
import warnings
|
5 |
+
|
6 |
import numpy as np
|
7 |
+
import torch.nn.functional as F
|
8 |
+
|
9 |
+
from enum import Enum
|
10 |
+
from copy import deepcopy
|
11 |
from sklearn.utils.extmath import softmax
|
12 |
from sentence_transformers import SentenceTransformer
|
|
|
13 |
|
14 |
+
from utils import *
|
15 |
|
|
|
16 |
|
17 |
+
# Environment setup for HF docker image
|
18 |
try:
|
19 |
os.mkdir('./cache')
|
20 |
except FileExistsError:
|
21 |
# Use existing cache dir
|
22 |
pass
|
23 |
|
24 |
+
|
25 |
# Create embeddings from example texts
|
26 |
# Guessing which environ var is correct
|
27 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = './cache'
|
|
|
47 |
# Controls the weight of the initial relevance score (0: ignore initial score, 1: only uses initial score)
|
48 |
LAMBDA = 0.5
|
49 |
|
50 |
+
# The different modes our ranking algorithm can run in
|
51 |
+
class RankingModes(Enum):
|
52 |
+
DIVERSIFY = "diversify"
|
53 |
+
NEUTRALISE = "neutralise"
|
54 |
+
|
55 |
|
56 |
def fairScore(prob_scores:list, target:list) -> float:
|
57 |
similarity = 1 - get_jsd_distance(prob_scores, target)
|
58 |
return similarity
|
59 |
+
|
60 |
+
|
61 |
def diversify(candidates: list, candidates_representation: dict, target: list) -> dict:
|
62 |
accumulator = np.zeros(len(target))
|
63 |
remaining = candidates.copy()
|
|
|
85 |
return diversified
|
86 |
|
87 |
|
88 |
+
def rankingfunc(inputJSON: dict, k: int = 10, mode: str = RankingModes.DIVERSIFY, debug: bool = False) -> dict:
|
89 |
'''
|
90 |
Rank a set of social media posts using our ranking algorithm
|
91 |
|
92 |
Inputs:
|
93 |
inputJSON (dict): JSON dict from the web browser plugin, following the
|
94 |
provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
|
95 |
+
k (int): We only mess with the ranking of the first k items in the feed, to avoid
|
96 |
+
unduly reducing engagement.
|
97 |
+
mode (str): The ranker algorithm mode. Options include 'diversify' or 'neutralise'.
|
98 |
debug (bool): If set, will also return extra debugging info in the return struct
|
99 |
|
100 |
Returns:
|
|
|
102 |
provided competition spec at https://github.com/HumanCompatibleAI/ranking-challenge
|
103 |
'''
|
104 |
|
105 |
+
assert k > 0, "k must be a positive integer greater than 0, but was {k}"
|
106 |
+
assert mode in RankingModes, f"mode must be in {RankingModes}, but was {mode}"
|
107 |
+
|
108 |
# Extract text documents and get embeddings
|
109 |
candidates = inputJSON['items']
|
110 |
+
|
111 |
+
if len(candidates) < k:
|
112 |
+
warnings.warn(f"k truncated from {k} to {len(candidates)} due to only that many posts being passed")
|
113 |
+
k = min(k, len(candidates))
|
114 |
+
|
115 |
if (debug):
|
116 |
print("Reranking top ", k)
|
117 |
|
|
|
145 |
print(initial_scores)
|
146 |
|
147 |
diversity_scores = []
|
148 |
+
if mode == RankingModes.DIVERSIFY:
|
149 |
diversity_scores = diversify(candidates, candidates_representation, TARGET_DISTRIBUTION)
|
150 |
|
151 |
for index in range(len(candidates)):
|
|
|
155 |
source = [(lw_cs[index] + 1.0) * 0.5, (rw_cs[index] + 1.0) * 0.5]
|
156 |
source = F.softmax(torch.stack(source), dim=0)
|
157 |
fairness = 0
|
158 |
+
|
159 |
+
if mode == RankingModes.DIVERSIFY:
|
160 |
# Diversification:
|
161 |
fairness = diversity_scores[candidates[index]['id']]
|
162 |
+
elif mode == RankingModes.NEUTRALISE:
|
163 |
# Neutralization:
|
164 |
fairness = fairScore(source, TARGET_DISTRIBUTION)
|
165 |
+
else:
|
166 |
+
raise ValueError(f"Unknown ranking algorithm mode: {mode}")
|
167 |
+
|
168 |
new_score = linearCombination(relevance, fairness, LAMBDA)
|
169 |
candidates[index]['score'] = new_score
|
170 |
|
|
|
182 |
|
183 |
final_ranking = reranked_ids
|
184 |
|
185 |
+
# TODO ajs 15/Apr/2024 Find a way to source high-quality out-of-feed posts, then incorporate them into the fusion algorithm
|
186 |
+
|
187 |
output_results = {
|
188 |
"ranked_ids": final_ranking,
|
189 |
"new_items": []
|