quran-colbert / app.py
akhooli's picture
Update app.py
3ff0003 verified
from ragatouille import RAGPretrainedModel
import os
import gradio as gr
from unicodedata import normalize
path_to_index = 'colbert/indexes/ArColbertQuran'
message = "waiting to load index ..."
if os.path.exists(path_to_index):
RAG = RAGPretrainedModel.from_index(path_to_index)
message = "index loaded!"
print(message)
import gradio as gr
def process_results(results):
answer = ""
for r in results:
answer += f"Sura: {r['document_id']} ({r['document_metadata']}) \n Text:{r['content']}\n\n"
return answer
k = 3 # How many documents you want to retrieve
def answer_fn(query):
results = RAG.search(query= normalize('NFKC', query), k=k)
return process_results(results)
qapp = gr.Interface(fn=answer_fn, inputs="textbox", outputs="textbox",
examples=[
"ما أهمية كتابة المعاملات؟", "أخبرني عن عذاب الله للمنافقين", "حسن معاملة الوالدين", "ما معجزات سيدنا عيسى", "ما هو التطفيف"
,"ما قصة المؤمنين الذين قتلوا في الحفرة؟", "ما آداب اﻻستئذان؟", "النبي الذي تربى في بيت حاكم مصر"],
title="Qur\'an Retrieval Demo - Semantic Search",
description="A basic demo based on Arabic ColBERT (250k queries, normalized) and simple text of the Qur\'an (also normalized). First query may take a minute, then much faster.\
Try to include relevant terms - this is just retrieval, not LLM chat and Qur\'an is an edge case.\
For details, see: https://www.linkedin.com/posts/akhooli_arabic-1-million-curated-triplets-dataset-activity-7222951839774699521-PZcw",)
if __name__ == "__main__":
qapp.launch()