bible-search / src /reranker.py
alronlam's picture
Add app and data files
613c93d
raw
history blame
2.07 kB
from typing import List
import numpy as np
import streamlit as st
from src.models import Chapter
class Reranker:
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
# TODO
return chapters
# Rerankers applicable to SemanticRetriever results
def sort_chapters(chapters, scores):
reranked_chapters = sorted(zip(chapters, scores), key=lambda x: x[1], reverse=True)
reranked_chapters = [x[0] for x in reranked_chapters]
return reranked_chapters
class CombinedScoreAndNumberReranker(Reranker):
def __init__(self, num_verse_weight=0.3, semantic_sim_weight=0.7):
self.num_verse_weight = num_verse_weight
self.semantic_sim_weight = semantic_sim_weight
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verse_score = compute_num_verse_scores(chapters)
max_sem_sim_score = compute_sem_sim_scores(chapters)
final_scores = (
self.num_verse_weight * num_verse_score
+ self.semantic_sim_weight * max_sem_sim_score
)
return sort_chapters(chapters, final_scores)
class SemanticSimScoreReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sort_chapters(chapters, sem_sim_scores)
class MaxVerseReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verses = [chapter.get_num_unique_highlight_verse() for chapter in chapters]
return sort_chapters(chapters, num_verses)
def compute_num_verse_scores(chapters):
num_verses = np.array(
[chapter.get_num_unique_highlight_verse() for chapter in chapters]
)
max_verses = max(num_verses)
num_verse_scores = num_verses / max_verses
return num_verse_scores
def compute_sem_sim_scores(chapters):
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sem_sim_scores