Spaces:
Running
Running
File size: 2,068 Bytes
613c93d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
from typing import List
import numpy as np
import streamlit as st
from src.models import Chapter
class Reranker:
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
# TODO
return chapters
# Rerankers applicable to SemanticRetriever results
def sort_chapters(chapters, scores):
reranked_chapters = sorted(zip(chapters, scores), key=lambda x: x[1], reverse=True)
reranked_chapters = [x[0] for x in reranked_chapters]
return reranked_chapters
class CombinedScoreAndNumberReranker(Reranker):
def __init__(self, num_verse_weight=0.3, semantic_sim_weight=0.7):
self.num_verse_weight = num_verse_weight
self.semantic_sim_weight = semantic_sim_weight
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verse_score = compute_num_verse_scores(chapters)
max_sem_sim_score = compute_sem_sim_scores(chapters)
final_scores = (
self.num_verse_weight * num_verse_score
+ self.semantic_sim_weight * max_sem_sim_score
)
return sort_chapters(chapters, final_scores)
class SemanticSimScoreReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sort_chapters(chapters, sem_sim_scores)
class MaxVerseReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verses = [chapter.get_num_unique_highlight_verse() for chapter in chapters]
return sort_chapters(chapters, num_verses)
def compute_num_verse_scores(chapters):
num_verses = np.array(
[chapter.get_num_unique_highlight_verse() for chapter in chapters]
)
max_verses = max(num_verses)
num_verse_scores = num_verses / max_verses
return num_verse_scores
def compute_sem_sim_scores(chapters):
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sem_sim_scores
|